In [1]:
pip install pyspark



In [2]:
#!pip install pyspark

import pyspark

sc = pyspark.SparkContext(appName="songs_and_spark")

songs = [
        "Despacito",
        "Nice for what",
        "No tears left to cry",
        "Despacito",
        "Havana",
        "In my feelings",
        "Nice for what",
        "despacito",
        "All the stars"
]

# parallelize the log_of_songs to use with Spark
distributed_songs = sc.parallelize(songs)

In [3]:
distributed_songs

ParallelCollectionRDD[0] at readRDDFromFile at PythonRDD.scala:289

In [4]:
def convert_song_to_lowercase(song):
  return song.lower()

In [5]:
distributed_songs.map(convert_song_to_lowercase)

PythonRDD[1] at RDD at PythonRDD.scala:53

In [6]:
distributed_songs.map(convert_song_to_lowercase).collect()

['despacito',
 'nice for what',
 'no tears left to cry',
 'despacito',
 'havana',
 'in my feelings',
 'nice for what',
 'despacito',
 'all the stars']

In [7]:
def convert_song_to_uppercase(song):
  return song.upper()

distributed_songs.map(convert_song_to_uppercase)

PythonRDD[3] at RDD at PythonRDD.scala:53

In [8]:
distributed_songs.map(convert_song_to_uppercase).collect()

['DESPACITO',
 'NICE FOR WHAT',
 'NO TEARS LEFT TO CRY',
 'DESPACITO',
 'HAVANA',
 'IN MY FEELINGS',
 'NICE FOR WHAT',
 'DESPACITO',
 'ALL THE STARS']

In [9]:
distributed_songs.collect()

['Despacito',
 'Nice for what',
 'No tears left to cry',
 'Despacito',
 'Havana',
 'In my feelings',
 'Nice for what',
 'despacito',
 'All the stars']

In [11]:
# Reading and Writing Data with Spark
from pyspark import SparkConf
from pyspark.sql import SparkSession

In [12]:
spark = SparkSession.builder.appName("songs_and_spark").getOrCreate()

In [13]:
spark.sparkContext.getConf().getAll()

[('spark.driver.extraJavaOptions',
  '-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false'),
 ('spark.app.startTime', '1735586998266'),
 ('spark.executor.id', 'driver'),
 ('spark.driver.host', 'da

In [14]:
spark

In [17]:
path = "/content/pyspark"
songs_log = spark.read.json(path)

In [18]:
songs_log.printSchema()

root
 |-- artist: string (nullable = true)
 |-- auth: string (nullable = true)
 |-- firstName: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- itemInSession: long (nullable = true)
 |-- lastName: string (nullable = true)
 |-- length: double (nullable = true)
 |-- level: string (nullable = true)
 |-- location: string (nullable = true)
 |-- method: string (nullable = true)
 |-- page: string (nullable = true)
 |-- registration: long (nullable = true)
 |-- sessionId: long (nullable = true)
 |-- song: string (nullable = true)
 |-- status: long (nullable = true)
 |-- ts: long (nullable = true)
 |-- userAgent: string (nullable = true)
 |-- userId: string (nullable = true)



In [19]:
dir(songs_log)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_collect_as_arrow',
 '_ipython_key_completions_',
 '_jcols',
 '_jdf',
 '_jmap',
 '_joinAsOf',
 '_jseq',
 '_lazy_rdd',
 '_repr_html_',
 '_sc',
 '_schema',
 '_session',
 '_show_string',
 '_sort_cols',
 '_sql_ctx',
 '_support_repr_html',
 'agg',
 'alias',
 'approxQuantile',
 'artist',
 'auth',
 'cache',
 'checkpoint',
 'coalesce',
 'colRegex',
 'collect',
 'columns',
 'corr',
 'count',
 'cov',
 'createGlobalTempView',
 'createOrReplaceGlobalTempView',
 'createOrReplaceTempView',
 'createTempView',
 'crossJoin',
 'crosstab',
 'cube',
 'describe',
 'distinct',
 'drop',
 'dropDuplicates',
 'drop

In [20]:
songs_log.take(5)

[Row(artist='Showaddywaddy', auth='Logged In', firstName='Kenneth', gender='M', itemInSession=112, lastName='Matthews', length=232.93342, level='paid', location='Charlotte-Concord-Gastonia, NC-SC', method='PUT', page='NextSong', registration=1509380319284, sessionId=5132, song='Christmas Tears Will Fall', status=200, ts=1513720872284, userAgent='"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"', userId='1046'),
 Row(artist='Lily Allen', auth='Logged In', firstName='Elizabeth', gender='F', itemInSession=7, lastName='Chase', length=195.23873, level='free', location='Shreveport-Bossier City, LA', method='PUT', page='NextSong', registration=1512718541284, sessionId=5027, song='Cheryl Tweedy', status=200, ts=1513720878284, userAgent='"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"', userId='1000'),
 Row(artist='Cobra Starship Featuring Leighton Meester', auth='Logged In', firstNa

In [23]:
songs_log.show(n=13)

+--------------------+---------+---------+------+-------------+---------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|              artist|     auth|firstName|gender|itemInSession| lastName|   length|level|            location|method|    page| registration|sessionId|                song|status|           ts|           userAgent|userId|
+--------------------+---------+---------+------+-------------+---------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|       Showaddywaddy|Logged In|  Kenneth|     M|          112| Matthews|232.93342| paid|Charlotte-Concord...|   PUT|NextSong|1509380319284|     5132|Christmas Tears W...|   200|1513720872284|"Mozilla/5.0 (Win...|  1046|
|          Lily Allen|Logged In|Elizabeth|     F|            7|    Chase|195.23873| free|Shreveport-Bossie...|   PUT

In [24]:
songs_log.groupBy('artist').count().show()

+--------------------+-----+
|              artist|count|
+--------------------+-----+
|      The Black Keys|   40|
|        STRATOVARIUS|    1|
|      The Chameleons|    1|
|Dashboard Confess...|    3|
|      Jarabe De Palo|    3|
|        Ziggy Marley|    1|
|        Yann Tiersen|   10|
|  The Watts Prophets|    1|
|            Goldfish|    1|
|           Kate Nash|    3|
|              DJ Taz|    1|
|    Jane's Addiction|    1|
|         Eva Cassidy|    4|
|               Rufio|    1|
|           Los Lobos|    4|
|         Silverstein|    1|
|        Rhett Miller|    1|
|              Nebula|    1|
|Yonder Mountain S...|    1|
|     Groove Coverage|    1|
+--------------------+-----+
only showing top 20 rows



In [25]:
songs_log.drop_duplicates().show()

+--------------------+---------+---------+------+-------------+----------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|              artist|     auth|firstName|gender|itemInSession|  lastName|   length|level|            location|method|    page| registration|sessionId|                song|status|           ts|           userAgent|userId|
+--------------------+---------+---------+------+-------------+----------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|               Petra|Logged In|  Gabriel|     M|           10|      Koch|291.05587| paid|     Panama City, FL|   PUT|NextSong|1505820418284|     1060|         Whole World|   200|1513723045284|Mozilla/5.0 (Wind...|  1061|
|     Alliance Ethnik|Logged In|  Abigail|     F|           36|  Davidson|252.21179| paid|Lansing-East Lans...| 

In [26]:
songs_log.distinct().show()

+--------------------+---------+---------+------+-------------+----------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|              artist|     auth|firstName|gender|itemInSession|  lastName|   length|level|            location|method|    page| registration|sessionId|                song|status|           ts|           userAgent|userId|
+--------------------+---------+---------+------+-------------+----------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|               Petra|Logged In|  Gabriel|     M|           10|      Koch|291.05587| paid|     Panama City, FL|   PUT|NextSong|1505820418284|     1060|         Whole World|   200|1513723045284|Mozilla/5.0 (Wind...|  1061|
|     Alliance Ethnik|Logged In|  Abigail|     F|           36|  Davidson|252.21179| paid|Lansing-East Lans...| 

In [27]:
songs_log.dropDuplicates()

DataFrame[artist: string, auth: string, firstName: string, gender: string, itemInSession: bigint, lastName: string, length: double, level: string, location: string, method: string, page: string, registration: bigint, sessionId: bigint, song: string, status: bigint, ts: bigint, userAgent: string, userId: string]

In [28]:
songs_log.describe()

DataFrame[summary: string, artist: string, auth: string, firstName: string, gender: string, itemInSession: string, lastName: string, length: string, level: string, location: string, method: string, page: string, registration: string, sessionId: string, song: string, status: string, ts: string, userAgent: string, userId: string]

In [30]:
# Save the songs_log file to a csv file
out_path = "/content/pyspark/spark_log_small.csv"
songs_log.write.save(out_path, format="csv", header=True)

In [31]:
songs_log = spark.read.csv(out_path, header=True)

In [32]:
songs_log.printSchema()

root
 |-- artist: string (nullable = true)
 |-- auth: string (nullable = true)
 |-- firstName: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- itemInSession: string (nullable = true)
 |-- lastName: string (nullable = true)
 |-- length: string (nullable = true)
 |-- level: string (nullable = true)
 |-- location: string (nullable = true)
 |-- method: string (nullable = true)
 |-- page: string (nullable = true)
 |-- registration: string (nullable = true)
 |-- sessionId: string (nullable = true)
 |-- song: string (nullable = true)
 |-- status: string (nullable = true)
 |-- ts: string (nullable = true)
 |-- userAgent: string (nullable = true)
 |-- userId: string (nullable = true)



In [34]:
songs_log.take(12)

[Row(artist='Showaddywaddy', auth='Logged In', firstName='Kenneth', gender='M', itemInSession='112', lastName='Matthews', length='232.93342', level='paid', location='Charlotte-Concord-Gastonia, NC-SC', method='PUT', page='NextSong', registration='1509380319284', sessionId='5132', song='Christmas Tears Will Fall', status='200', ts='1513720872284', userAgent='"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"', userId='1046'),
 Row(artist='Lily Allen', auth='Logged In', firstName='Elizabeth', gender='F', itemInSession='7', lastName='Chase', length='195.23873', level='free', location='Shreveport-Bossier City, LA', method='PUT', page='NextSong', registration='1512718541284', sessionId='5027', song='Cheryl Tweedy', status='200', ts='1513720878284', userAgent='"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"', userId='1000'),
 Row(artist='Cobra Starship Featuring Leighton Meester', a

In [37]:
songs_log.select("userID").show(n=5)

+------+
|userID|
+------+
|  1046|
|  1000|
|  2219|
|  2373|
|  1747|
+------+
only showing top 5 rows



In [38]:
songs_log.select("level").show(n=7)

+-----+
|level|
+-----+
| paid|
| free|
| paid|
| paid|
| free|
| free|
| free|
+-----+
only showing top 7 rows



In [39]:
# Data Exploration
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import desc
from pyspark.sql.functions import asc
from pyspark.sql.functions import sum as Fsum

import datetime

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [41]:
path = "/content/pyspark/sparkify_log_small.json"
user_log = spark.read.json(path)

In [42]:
user_log.describe("registration", "sessionId").show()

+-------+--------------------+------------------+
|summary|        registration|         sessionId|
+-------+--------------------+------------------+
|  count|                9664|             10000|
|   mean|1.504695369588739...|         4436.7511|
| stddev| 8.473142521316586E9|2043.1281541827552|
|    min|       1463503881284|                 9|
|    max|       1513760702284|              7144|
+-------+--------------------+------------------+



In [43]:
user_log.count()

10000

In [44]:
user_log.select("page").dropDuplicates().sort("page").show()

+----------------+
|            page|
+----------------+
|           About|
|       Downgrade|
|           Error|
|            Help|
|            Home|
|           Login|
|          Logout|
|        NextSong|
|   Save Settings|
|        Settings|
|Submit Downgrade|
|  Submit Upgrade|
|         Upgrade|
+----------------+



In [45]:
user_log.select(
    ["userId", "firstname", "page", "song"]
    ).where(user_log.userId == "1000").collect()

[Row(userId='1000', firstname='Elizabeth', page='NextSong', song='Cheryl Tweedy')]

In [46]:
user_log.select(["userId", "firstname"]).groupBy("userId").count().collect()

#user_log.select("userId").sort("userId").dropDuplicates().show()

[Row(userId='2904', count=1),
 Row(userId='691', count=4),
 Row(userId='2294', count=62),
 Row(userId='2162', count=39),
 Row(userId='1436', count=2),
 Row(userId='2088', count=14),
 Row(userId='2275', count=5),
 Row(userId='2756', count=8),
 Row(userId='800', count=3),
 Row(userId='1394', count=28),
 Row(userId='926', count=7),
 Row(userId='1746', count=2),
 Row(userId='2696', count=2),
 Row(userId='1280', count=2),
 Row(userId='870', count=2),
 Row(userId='7', count=2),
 Row(userId='1903', count=7),
 Row(userId='591', count=1),
 Row(userId='613', count=4),
 Row(userId='574', count=7),
 Row(userId='307', count=1),
 Row(userId='577', count=2),
 Row(userId='2402', count=4),
 Row(userId='2644', count=1),
 Row(userId='1043', count=3),
 Row(userId='1349', count=12),
 Row(userId='2964', count=1),
 Row(userId='462', count=122),
 Row(userId='1418', count=4),
 Row(userId='647', count=1),
 Row(userId='2392', count=1),
 Row(userId='2200', count=1),
 Row(userId='2980', count=9),
 Row(userId='442'

In [47]:
user_log.select(
    ["userId", "firstname", "page", "song"]
    ).where(user_log.userId == "1351").collect()

[Row(userId='1351', firstname='Zachary', page='NextSong', song='Godine prolaze'),
 Row(userId='1351', firstname='Zachary', page='NextSong', song='Your Decision'),
 Row(userId='1351', firstname='Zachary', page='NextSong', song='Un Mundo Raro'),
 Row(userId='1351', firstname='Zachary', page='NextSong', song='Neighborhoods'),
 Row(userId='1351', firstname='Zachary', page='NextSong', song='The Way I Loved You'),
 Row(userId='1351', firstname='Zachary', page='NextSong', song='I Gotta Feeling')]

In [48]:
user_log.select(
    ["userId", "firstname", "page", "song"]
    ).where(user_log.userId == "974").collect()

[Row(userId='974', firstname='Abegail', page='Home', song=None),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Gunn Clapp'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Sweet Nothings Gone Forever'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Take The "A" Train'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Chaplin'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Emily Dickinson'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Almaz'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Peace Of Mind'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Here I Am (Come And Take Me)'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Superstition'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='A Certain Romance'),
 Row(userId='974', firstname='Abegail', page='NextSong', song='Me About You'),
 Row(userId='974', firstname='Abeg

In [49]:
user_log.describe().show()

+-------+-----------------+----------+---------+------+------------------+--------+------------------+-----+------------+------+-------+--------------------+------------------+--------+------------------+--------------------+--------------------+------------------+
|summary|           artist|      auth|firstName|gender|     itemInSession|lastName|            length|level|    location|method|   page|        registration|         sessionId|    song|            status|                  ts|           userAgent|            userId|
+-------+-----------------+----------+---------+------+------------------+--------+------------------+-----+------------+------+-------+--------------------+------------------+--------+------------------+--------------------+--------------------+------------------+
|  count|             8347|     10000|     9664|  9664|             10000|    9664|              8347|10000|        9664| 10000|  10000|                9664|             10000|    8347|             1000

In [50]:
# Drop Rows with Missing Values
user_log_valid = user_log.dropna(how = "any", subset = ["userId", "sessionId"])
user_log_valid.count()

10000

In [51]:
user_log.select("userId").dropDuplicates().sort("userId").show(n=9)

+------+
|userId|
+------+
|      |
|    10|
|   100|
|  1000|
|  1003|
|  1005|
|  1006|
|  1017|
|  1019|
+------+
only showing top 9 rows



In [55]:
user_log_valid.select("page").distinct().show()

+----------------+
|            page|
+----------------+
|Submit Downgrade|
|            Home|
|       Downgrade|
|          Logout|
|   Save Settings|
|           About|
|        Settings|
|           Login|
|        NextSong|
|            Help|
|         Upgrade|
|           Error|
|  Submit Upgrade|
+----------------+



In [61]:
# Submit Downgrade

user_log_valid.filter("page = 'Submit Downgrade'").show()

+------+---------+---------+------+-------------+--------+------+-----+--------------------+------+----------------+-------------+---------+----+------+-------------+--------------------+------+
|artist|     auth|firstName|gender|itemInSession|lastName|length|level|            location|method|            page| registration|sessionId|song|status|           ts|           userAgent|userId|
+------+---------+---------+------+-------------+--------+------+-----+--------------------+------+----------------+-------------+---------+----+------+-------------+--------------------+------+
|  NULL|Logged In|    Kelly|     F|           24|  Newton|  NULL| paid|Houston-The Woodl...|   PUT|Submit Downgrade|1513283366284|     5931|NULL|   307|1513768454284|Mozilla/5.0 (Wind...|  1138|
+------+---------+---------+------+-------------+--------+------+-----+--------------------+------+----------------+-------------+---------+----+------+-------------+--------------------+------+



In [60]:
# Save Settings

user_log_valid.filter("page = 'Save Settings'").show()

+------+---------+---------+------+-------------+--------+------+-----+--------------------+------+-------------+-------------+---------+----+------+-------------+--------------------+------+
|artist|     auth|firstName|gender|itemInSession|lastName|length|level|            location|method|         page| registration|sessionId|song|status|           ts|           userAgent|userId|
+------+---------+---------+------+-------------+--------+------+-----+--------------------+------+-------------+-------------+---------+----+------+-------------+--------------------+------+
|  NULL|Logged In|  Enrique|     M|           17|  Miller|  NULL| paid|Chicago-Napervill...|   PUT|Save Settings|1500182660284|     5641|NULL|   307|1513739372284|"Mozilla/5.0 (Win...|    58|
|  NULL|Logged In|  Damarco|     M|            2|  Cooper|  NULL| paid|   Winston-Salem, NC|   PUT|Save Settings|1510023739284|     5104|NULL|   307|1513751955284|Mozilla/5.0 (Wind...|  1647|
|  NULL|Logged In|  Michael|     M|     

In [59]:
# NextSong

user_log_valid.filter("page = 'NextSong'").show()

+--------------------+---------+---------+------+-------------+---------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|              artist|     auth|firstName|gender|itemInSession| lastName|   length|level|            location|method|    page| registration|sessionId|                song|status|           ts|           userAgent|userId|
+--------------------+---------+---------+------+-------------+---------+---------+-----+--------------------+------+--------+-------------+---------+--------------------+------+-------------+--------------------+------+
|       Showaddywaddy|Logged In|  Kenneth|     M|          112| Matthews|232.93342| paid|Charlotte-Concord...|   PUT|NextSong|1509380319284|     5132|Christmas Tears W...|   200|1513720872284|"Mozilla/5.0 (Win...|  1046|
|          Lily Allen|Logged In|Elizabeth|     F|            7|    Chase|195.23873| free|Shreveport-Bossie...|   PUT

In [58]:
# Upgrade

user_log_valid.filter("page = 'Upgrade'").show()

+------+---------+---------+------+-------------+---------+------+-----+--------------------+------+-------+-------------+---------+----+------+-------------+--------------------+------+
|artist|     auth|firstName|gender|itemInSession| lastName|length|level|            location|method|   page| registration|sessionId|song|status|           ts|           userAgent|userId|
+------+---------+---------+------+-------------+---------+------+-----+--------------------+------+-------+-------------+---------+----+------+-------------+--------------------+------+
|  NULL|Logged In|   Easton|     M|            1|   Harris|  NULL| free|Washington-Arling...|   GET|Upgrade|1511647902284|     4748|NULL|   200|1513727963284|"Mozilla/5.0 (Win...|   553|
|  NULL|Logged In|   Ashton|     M|            3|   Nelson|  NULL| free|Los Angeles-Long ...|   GET|Upgrade|1510608931284|      511|NULL|   200|1513730196284|Mozilla/5.0 (Wind...|   512|
|  NULL|Logged In|   Dreden|     M|            2|Rodriguez|  NULL

In [62]:
# Error

user_log_valid.filter("page = 'Error'").show()

+------+---------+---------+------+-------------+---------+------+-----+--------------------+------+-----+-------------+---------+----+------+-------------+--------------------+------+
|artist|     auth|firstName|gender|itemInSession| lastName|length|level|            location|method| page| registration|sessionId|song|status|           ts|           userAgent|userId|
+------+---------+---------+------+-------------+---------+------+-----+--------------------+------+-----+-------------+---------+----+------+-------------+--------------------+------+
|  NULL|Logged In|     Zoie|     F|           87|   Wright|  NULL| paid|Chicago-Napervill...|   GET|Error|1512172030284|     2163|NULL|   404|1513734618284|"Mozilla/5.0 (Mac...|  2164|
|  NULL|Logged In|   Sophia|     F|           13|Gutierrez|  NULL| paid|Birmingham-Hoover...|   GET|Error|1513249725284|     4678|NULL|   404|1513744733284|"Mozilla/5.0 (Mac...|  1037|
|  NULL|Logged In|    Diego|     M|            5|   Thomas|  NULL| paid|   

In [65]:
user_log.select(["userId", "firstname", "page", "level", "song"]).where(user_log.userId == "1138").collect()

[Row(userId='1138', firstname='Kelly', page='Home', level='paid', song=None),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Everybody Everybody'),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Gears'),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Use Somebody'),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Love Of My Life (1993 Digital Remaster)'),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Down In The Valley Woe'),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Treat Her Like A Lady'),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song="Everybody Thinks You're An Angel"),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Fourteen Wives'),
 Row(userId='1138', firstname='Kelly', page='NextSong', level='paid', song='Love On The Rocks'),
 Row(userId='1138', f