In [1]:
! sudo cp /home/jovyan/work/jar/spark-cassandra-connector-assembly-3.1.0-11-g53f24ce9.jar /usr/local/spark/jars/spark-cassandra-connector-assembly-3.1.0-11-g53f24ce9.jar

In [45]:
import pyspark
from pyspark.sql import SparkSession

In [46]:
#     .config("spark.sql.extensions","com.datastax.spark.connector.CassandraSparkExtensions") \

spark = SparkSession.builder \
    .master("local") \
    .appName('jupyter-pyspark') \
    .config("spark.cassandra.connection.host", "cassandra0") \
    .config("spark.sql.catalog.cassandra", "com.datastax.spark.connector.datasource.CassandraCatalog") \
    .getOrCreate()
sc = spark.sparkContext
sc.setLogLevel("ERROR")

RUN THIS CSQL Code in CQLSH
```
CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 };
CREATE TABLE test.words (word text , count int, primary key (word) );
INSERT INTO test.words(word, count) VALUES ('and', 50);
```

In [47]:
# read table
spark.read.format("org.apache.spark.sql.cassandra")\
    .options(table="words", keyspace="test")\
    .load().show()

+----+-----+
|word|count|
+----+-----+
| and|   50|
+----+-----+



In [48]:
# another way to read a table
spark.read.table("cassandra.test.words").show()

+----+-----+
|word|count|
+----+-----+
| and|   50|
+----+-----+



In [49]:
# Spark doesn't care about cassandra constraints.
spark.read.table("cassandra.demo.system_utilization").filter("cpu_pct = 5").show()

+--------+-------------------+-------+-------+
|hostname|        measured_on|cpu_pct|     os|
+--------+-------------------+-------+-------+
|    mars|2018-07-19 09:00:00|      5|windows|
|  Saturn|2018-07-19 10:00:00|      5|windows|
|   venus|2018-07-19 09:00:00|      5|    osx|
+--------+-------------------+-------+-------+



In [51]:
# read csv files....
df = spark.read.csv("/home/jovyan/datasets/ufo-sightings/ufo-sightings-2016-*.csv",header=True, inferSchema=True).cache()
df.filter("State is not null").show(10)
df.printSchema()

+-------------+--------------------+-----+---------+-------------+--------------------+--------+
|  Date / Time|                City|State|    Shape|     Duration|             Summary|  Posted|
+-------------+--------------------+-----+---------+-------------+--------------------+--------+
|2/29/16 23:45|        Harbor Beach|   MI|    Light|     1 minute|Yellow/white ball...|3/4/2016|
|2/29/16 23:30|           Sebastian|   FL| Triangle|20-40 minutes|6 low flying craf...|3/4/2016|
|2/29/16 23:00|Salunga/Landisvil...|   PA| Triangle| 5-15 minutes|Pennsylvania tria...|3/4/2016|
|2/29/16 22:00|                York|   PA| Triangle|   30 minutes|Myself and 2 frie...|3/4/2016|
|2/29/16 21:35|              Joliet|   IL|  Unknown|   10 minutes|At approximately ...|3/4/2016|
|2/29/16 20:45|  Port Hope (Canada)|   ON|    Light|   20 minutes|Strange glowing l...|3/4/2016|
|2/29/16 20:29|       Warner Robins|   GA| Fireball|    2 minutes|There were severa...|3/4/2016|
|2/29/16 19:50|              A

# CQL
```
create table ufos ( 
    "Date / Time" timestamp, 
    State text, 
    City text, 
    Duration text, 
    Posted date, 
    Shape text, Summary text, 
    primary key ( State, "Date / Time")
);
```

In [63]:
# Save To Cassandra!!!

# Make sure there are not nulls in the PK values!
df2 = df.filter("State is not null").filter("\"Date / Time\" is not null")

# Write to the cassandra table!
df2.write.format("org.apache.spark.sql.cassandra")\
    .mode('overwrite')\
    .option("confirm.truncate","true")\
    .options(table="ufos", keyspace="test")\
    .save()

In [64]:
# read back from Cassandra !!!
cdf = spark.read.table("cassandra.test.ufos")
cdf.show(100)

+-----+-------------+--------------------+--------------------+---------+---------+--------------------+
|State|  Date / Time|                City|            Duration|   Posted|    Shape|             Summary|
+-----+-------------+--------------------+--------------------+---------+---------+--------------------+
|   VT|1/20/16 19:00|    Norwich/Thetford|          10 minutes|1/23/2016|    Other|Trio of very brig...|
|   VT|1/20/16 20:00|          Barre Town|          10 seconds|1/20/2016|   Circle|3 bright-warm yel...|
|   VT| 3/3/16 20:30|        West Topsham|          10 minutes| 3/4/2016| Changing|Several orange li...|
|   VT| 3/4/16 18:15|             Fairlee|           5 minutes|3/11/2016|     Oval|Three pairs of tw...|
|   VT| 3/4/16 18:25|            Thetford|            1 minute|3/11/2016|    Light|Two circular ball...|
|   CO| 1/1/16 00:15|            Longmont|                  20| 1/5/2016|  Diamond|4 or 5 red/orange...|
|   CO|1/10/16 16:53|              Denver|        5-10 