This notebook will help you to learn how YugabyteDB(YCQL) can connect from Azure Databricks. 
In this demo, following scenarios are covered

1. Load CSV from Databricks Filestore into YugabyteDB 
2. Import Data into YugabyteDB
   2a. Import Parquet file into YugabyteDB
   2b. Import Avro file into YugabyteDB
3. Export YugabyteDB Table into Storage Folder
   3a.  Export YugabyteDB table into Parquet file
   3b.  Export YugabyteDB table into Avro file
4. Import Delta Table into YugabyteDB table  
5. Export YugabyteDB table into Delta Lake Table
6. Data Visualization and sample queries

In [0]:
%python
#1. Load Salesrecords CSV directly into YugabyteDB (YCQL) Table
df1 = spark.read.format("csv").option("header", "true").option("inferschema", "true").load("dbfs:/FileStore/tables/1000_Sales_Records_1.csv")

import os
import pyspark
#import pyspark_cassandra
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.sql.functions import col
spark = (SparkSession.builder
    .appName('simple_etl')
    .config("spark.cassandra.connection.host", "10.14.16.7")
    .config('spark.cassandra.connection.port', '9042')
    .config("spark.cassandra.auth.username", "cassandra")
    .config("spark.cassandra.auth.password", "cassandra")
    .config("spark.sql.extensions", "com.datastax.spark.connector.CassandraSparkExtensions")
    .config("spark.cassandra.connection.localDC", "eastus")
    .getOrCreate())
df1.createOrReplaceTempView("salesorder")
pqrdf = spark.sql("SELECT orderid, region,country,itemtype,saleschannel,orderpriority,orderdate,shipdate,unitssold, unitprice, unitcost, totalrevenue, totalcost , totalprofit  FROM salesorder ")
pqrdf.write.format("org.apache.spark.sql.cassandra").mode("overwrite").option("confirm.truncate","true").options(table="salesprofit_test", keyspace ="demo").save()
#df1.write.format("org.apache.spark.sql.cassandra").mode('append').options(table="salesprofit_test1", keyspace="demo").save()

In [0]:
%python
#2a. Read parquet file and load into YugabyteDB
sourcedf = sqlContext.read.parquet("/tmp/salesprofit")
sourcedf.createOrReplaceTempView("salesorder")
df = spark.sql("SELECT orderid, region,country,itemtype,saleschannel,orderpriority,orderdate,shipdate,unitssold, unitprice, unitcost, totalrevenue, totalcost , totalprofit  FROM salesorder ")
df.write.format("org.apache.spark.sql.cassandra").mode("overwrite").option("confirm.truncate","true").options(table="salesprofit_test", keyspace ="demo").save()

In [0]:
%python
#2b. Read avro file and load into YugabyteDB
datafavro = spark.read.format("avro").load("/tmp/salesprofit_avro_export")
datafavro.createOrReplaceTempView("salesorder")
df = spark.sql("SELECT orderid, region,country,itemtype,saleschannel,orderpriority,orderdate,shipdate,unitssold, unitprice, unitcost, totalrevenue, totalcost , totalprofit  FROM salesorder ")
df.write.format("org.apache.spark.sql.cassandra").mode("overwrite").option("confirm.truncate","true").options(table="salesprofit_avro_test", keyspace ="demo").save()

In [0]:
%python
#3a. Export YugabyteDB table into parquet file format.
pd = spark.read.format("org.apache.spark.sql.cassandra").options(table="salesprofit_test", keyspace="demo").load()
pd.write.mode("overwrite").parquet("/tmp/export/salesprofit_test")

In [0]:
%python
#3a. Export YugabyteDB table into avro file format
pd = spark.read.format("org.apache.spark.sql.cassandra").options(table="salesprofit_avro_test", keyspace="demo").load()
pd.write.format('avro').mode('overwrite').save("/tmp/export/avro/salesprofit_test_avro")

In [0]:
%python
#4 Import data from Delta table to YugabyteDB (YCQL)
dataf = spark.read.format("delta").load("dbfs:/user/hive/warehouse/sample_table4")
dataf.write.format("org.apache.spark.sql.cassandra").mode("overwrite").option("confirm.truncate","true").options(table="employee", keyspace ="demo").save()


In [0]:
%python
#5 Read a table from YugabyteDB (YCQL) using JBDC and load into Delta table
pd = spark.read.format("org.apache.spark.sql.cassandra").options(table="salesprofit_test", keyspace="demo").load()
pd.write.format ( "delta" ).mode("overwrite").saveAsTable ( "sales_details" )

In [0]:
%python
#6. Data Visualization using YugabyteDB
df = spark.read.format("org.apache.spark.sql.cassandra").options(table="salesprofit_test", keyspace="demo").load()
df.show()
df.createOrReplaceTempView ( "profitanalysis" )

In [0]:
%sql
SELECT * FROM profitanalysis 

orderid,country,itemtype,orderdate,orderpriority,region,saleschannel,shipdate,totalcost,totalprofit,totalrevenue,unitcost,unitprice,unitssold
905392587,Kiribati,Cereal,7/24/2012,L,Australia and Oceania,Offline,8/16/2012,543507.51,411146.19,954653.7,117.11,205.7,4641
613830459,Benin,Cereal,12/12/2016,C,Sub-Saharan Africa,Offline,1/16/2017,577118.08,436571.52,1013689.6,117.11,205.7,4928
927666509,Bahrain,Household,6/27/2012,H,Middle East and North Africa,Online,7/17/2012,3010214.6,992722.7,4002937.3,502.54,668.27,5990
736967885,Portugal,Cosmetics,2/27/2011,C,Europe,Offline,3/12/2011,1060956.57,700522.23,1761478.8,263.33,437.2,4029
432995069,Uzbekistan,Clothes,11/11/2010,C,Asia,Online,12/13/2010,61573.12,126169.92,187743.04,35.84,109.28,1718
247802054,Senegal,Household,8/27/2012,L,Sub-Saharan Africa,Offline,9/8/2012,4517332.06,1489746.97,6007079.03,502.54,668.27,8989
444604098,Belarus,Baby Food,10/26/2010,H,Europe,Offline,10/31/2010,1129968.96,679455.68,1809424.64,159.42,255.28,7088
221530139,Russia,Fruits,1/8/2011,L,Europe,Offline,1/26/2011,31458.32,10955.86,42414.18,6.92,9.33,4546
927232635,Bangladesh,Personal Care,10/15/2016,L,Asia,Online,11/24/2016,430521.99,190380.82,620902.81,56.67,81.73,7597
860886800,Sri Lanka,Snacks,11/13/2013,L,Asia,Offline,11/23/2013,427761.6,242064.6,669826.2,97.44,152.58,4390


In [0]:
%sql
SELECT region, sum(TotalRevenue) FROM profitanalysis group by region

region,sum(CAST(TotalRevenue AS DOUBLE))
Middle East and North Africa,175106535.74
Australia and Oceania,105689572.6
Europe,353167462.93
Sub-Saharan Africa,356724250.12
Central America and the Caribbean,143997610.51
Asia,167674809.49000004
North America,24961598.94
