### Examining Twitter Ratio Metrics Using PySpark
##### Michele Waters


In [2]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# Create connection to cluster/instance of SparkContext class
sc=pyspark.SparkContext.getOrCreate()
print("Version:", sc.version)

Version: 3.0.1


In [4]:
#Create SparkSession object/ interface with SparkContext
spark = SparkSession.builder.getOrCreate()

In [5]:
def init_sql_ctx(db_path, table_name, sc):
    '''
    Initialize SQL Context given SQL database file name, table name and Spark Context
    '''
    query=f'SELECT * FROM {table_name}' #SQL query
    conn = sqlite3.connect(db_path) #SQL Connection
    df = pd.read_sql_query(query, conn) #Get table from SQL query
    sqlCtx = SQLContext(sc) #Create SQL table
    spark_df = sqlCtx.createDataFrame(df) #Create SQL dataframe 
    spark_df.registerTempTable(table_name) #Register dataframe as SQL table
    #spark_df.createOrReplaceTempView(table_name) # Add table to the catalog
    return sqlCtx

In [6]:
#Initialize SQL Context with Twitter data
sqlCtx=init_sql_ctx(db_path='../tweet_data/merged_tweets_2019_2020.db',table_name= 'merged_twitter_data_gated', sc=sc)

In [7]:
#Display table names
tables=sqlCtx.tableNames()
print(tables)

['merged_twitter_data_gated']


In [8]:
print(spark.catalog.listTables())

[Table(name='merged_twitter_data_gated', database=None, description=None, tableType='TEMPORARY', isTemporary=True)]


In [9]:
#Show ~first 20 lines
sqlCtx.sql('SELECT * FROM merged_twitter_data_gated').show()

+---+-----------+--------------------+--------------------+---------------+---------------+--------------------+--------------------+---------------+--------------------+-------------------+--------+---------+-----------+---------+--------------------+---------------------+-------------------+-------------------+----------------------+-----------+-------------------+--------------------+-----------------------+----------+--------------+------------+--------------------+--------------------+--------------------+--------+---------------+-------------+------------+----------------+--------------+--------+---+----+----+------+------+--------------------+--------------------+------------+-----------+---------------+---------------+------------+-----------+------------+--------------+
| id|   username|                text|        url_attached|       hashtags|       reply_to|                date|      formatted_date|             to|       link_to_tweet|           tweet_id|retweets|favorites|r

In [10]:
#Convert spark tables to pandas
def sql_query_df(query='SELECT * FROM merged_twitter_data_gated LIMIT 5', sqlCtx=sqlCtx):
    '''Returns result of SQL query from SQLContext object and displays as pandas dataframe'''
    return sqlCtx.sql(query).toPandas()

In [11]:
#View First 5 rows of confirmed_global table, i.e. confirmed COVID-19 cases
user_df=sql_query_df('SELECT DISTINCT username FROM merged_twitter_data_gated')

In [12]:
users=sorted(list(user_df.username))
users

['50cent',
 'ac360',
 'alexisohanian',
 'ambassadorrice',
 'arianagrande',
 'barackobama',
 'bariweiss',
 'bchesky',
 'berniesanders',
 'beyonce',
 'billgates',
 'billmaher',
 'billoreilly',
 'breitbartnews',
 'brianstelter',
 'britneyspears',
 'chrisbrown',
 'chrissyteigen',
 'cillizzacnn',
 'comey',
 'cristiano',
 'crossfitceo',
 'cthagod',
 'ddlovato',
 'donlemon',
 'drdrew',
 'drewbrees',
 'droz',
 'drphil',
 'elonmusk',
 'equifax',
 'equinox',
 'facebook',
 'fedex',
 'foxnews',
 'gary_kelly',
 'govchristie',
 'hillaryclinton',
 'ingrahamangle',
 'jakepaul',
 'jeffbezos',
 'jerryfalwelljr',
 'joebiden',
 'joenbc',
 'joerogan',
 'jtimberlake',
 'justinamash',
 'justinbieber',
 'kaepernick7',
 'kamalaharris',
 'kanyewest',
 'katyperry',
 'kevinhart4real',
 'kimkardashian',
 'kyliejenner',
 'lancearmstrong',
 'leamichele',
 'lisamurkowski',
 'loganpaul',
 'maddow',
 'mcuban',
 'michelleobama',
 'mileycyrus',
 'mittromney',
 'morningmika',
 'natesilver538',
 'nba',
 'nfl',
 'nike',
 'n

In [13]:
column_names=sql_query_df('SELECT * FROM merged_twitter_data_gated LIMIT 1').columns
column_names

Index(['id', 'username', 'text', 'url_attached', 'hashtags', 'reply_to',
       'date', 'formatted_date', 'to', 'link_to_tweet', 'tweet_id', 'retweets',
       'favorites', 'reply_count', 'author_id', 'ratio_comment_like',
       'ratio_comment_retweet', 'log_ratio', 'ratio_richter_likes',
       'ratio_richter_retweets', 'got_ratioed', 'got_richter_ratioed',
       'd_richter_like_ratio', 'd_richter_retweet_ratio', 'account_id',
       'location', 'name', 'description', 'url', 'created_at', 'verified',
       'followers_count', 'friends_count', 'listed_count', 'favourites_count',
       'statuses_count', 'category', 'doy', 'year', 'hour', 'minute', 'second',
       'text_processed', 'cleaned_text', 'encoded_urls', 'url_present',
       'encoded_hashtag', 'hashtag_present', 'num_hashtags', 'd_log_ratio',
       'ratio_target', 'cluster_labels'],
      dtype='object')

In [14]:
sql_query_df("SELECT text_processed FROM merged_twitter_data_gated LIMIT 5")

Unnamed: 0,text_processed
0,as has always been true at key moments in his...
1,we have seen the power that our voices have w...
2,on national gun violence awareness day we w...
3,third every city in this country should be a...
4,second every mayor should review their use o...


* Check for null text values

In [15]:
sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")

----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 62097)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/socketserver.py", line 316, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/Users/michelewaters/anaconda3/lib/python3.7/socketserver.py", line 347, in process_request
    self.finish_request(request, client_address)
  File "/Users/michelewaters/anaconda3/lib/python3.7/socketserver.py", line 360, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/Users/michelewaters/anaconda3/lib/python3.7/socketserver.py", line 720, in __init__
    self.handle()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/accumulators.py", line 268, in handle
    poll(accum_updates)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/accumulators.py", line 241, in poll
    if func():
  File "/U

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:62067)
Traceback (most recent call last):
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ec02a6eecccc>", line 1, in <module>
    sql_query_df("SELECT COUNT(*) FROM merged_twitter_data_gated WHERE text=' ' ")
  File "<ipython-input-10-bbf28c7cf9f5>", line 4, in sql_query_df
    return sqlCtx.sql(query).toPandas()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/pandas/conversion.py", line 138, in toPandas
    pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/pyspark/sql/dataframe.py", line 596, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/michelewaters/anaconda3/lib/python3.7/site-packages/py4j/jav

Py4JNetworkError: An error occurred while trying to connect to the Java server (127.0.0.1:62067)

* Look at tweets with missing text

In [None]:
sql_query_df("SELECT * FROM merged_twitter_data_gated \
             WHERE text=' ' \
             LIMIT 10")

* Look at tweet frequency of accounts (removing tweets without text)

In [None]:
user_freq_df=sql_query_df("SELECT username, COUNT(*) as frequency \
             FROM merged_twitter_data_gated \
             WHERE text!=' '\
             GROUP BY username \
             ORDER BY frequency \
             DESC")

In [None]:
user_freq_df.head()

In [None]:
user_freq_df.sort_values(by='frequency').plot.barh(figsize=(10, 20), x='username', y='frequency')
plt.title("User Tweet Frequency")
plt.xlabel('Frequency')
plt.ylabel('User Name')
plt.show()

In [None]:
ratio_freq_df=sql_query_df("SELECT username, ratio_target, COUNT(*) as frequency \
             FROM merged_twitter_data_gated \
             WHERE text!=' '\
             GROUP BY username, ratio_target \
             ORDER BY username, frequency, ratio_target \
             DESC")

In [None]:
ratio_freq_df

* Accounts with most ratios:

In [None]:
ratio_freq_df[ratio_freq_df.ratio_target==1].sort_values(by='frequency').plot.barh(figsize=(10, 20), x='username', y='frequency')
plt.title("User Tweet Frequency")
plt.xlabel('Frequency')
plt.ylabel('User Name')
plt.show()

In [None]:
sql_query_df('SELECT username, COUNT(*) AS ratio_num \
                            FROM merged_twitter_data_gated \
                            WHERE ratio_target=1  \
                            GROUP BY username \
                            ORDER BY ratio_num \
                            DESC')

In [None]:
sql_query_df("SELECT * FROM merged_twitter_data_gated \
                            WHERE ratio_target=1 AND text!=' ' \
                            ORDER BY log_ratio \
                            DESC ")

In [None]:
sql_query_df(f'SELECT username, COUNT(*) as tweet_count, \
             
              FROM merged_twitter_data_gated \
                            GROUP BY username \
                            ORDER BY tweet_count \
                            DESC')

In [None]:
ratio_freq_df=sql_query_df("SELECT username, ratio_target, \
                           COUNT(SELECT ratio_target FROM merged_twitter_data_gated \
                            WHERE ratio_target==1)/ COUNT(*)
                           as percent_ratio \
             FROM merged_twitter_data_gated \
             WHERE text!=' '\
             GROUP BY username, ratio_target \
             ORDER BY username, frequency, ratio_target \
             DESC")