In [None]:
# We import the necessary packages 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# We import the coordinates of the states

import json 
with open('gz_2010_us_040_00_500k.json') as json_file:
    states = json.load(json_file)

In [None]:
# And add unique ids for mapping

for i in range(len(states['features'])):
    states['features'][i]['id'] = i

In [None]:
# We make a dataframe to contain the state name and its id

columns = ['State Name', 'id']

state_ids_df = pd.DataFrame(columns = columns)

In [None]:
# We fill the dataframe

state_names = []
state_ids = []

for i in range(len(states['features'])):
    state_names.append(states['features'][i]['properties']['NAME'])
    state_ids.append(states['features'][i]['id'])

In [None]:
state_ids_df['State Name'] = state_names
state_ids_df['id'] = state_ids

state_ids_df

In [None]:
# We get the tweet data set 

df = pd.read_csv('classified_data1.csv')

In [None]:
df.head()

In [None]:
# Drop the unneeded columns 

df = df.drop(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'start date', 'end date'], axis =1)

In [None]:
# We see that a lot of the tweets have no state

print(df[pd.isna(df['state'])].shape[0])

In [None]:
# We drop the tweets where the state is nan

df = df.dropna(subset = ['state'])

# And reset the indexes 

df = df.reset_index()

In [None]:
# Now we want to add an extra column containing the state id from the coordinates

state_ids_list = []

for i in range(df.shape[0]):
    
    state = df['state'][i] 
    state_id = state_names.index(state)
    state_ids_list.append(state_id)

In [None]:
# We add the column

df['state_id'] = state_ids_list


In [None]:
df.head()

In [None]:
# We want to change the data column to the just days 

df['date'] = pd.to_datetime(df['date']).dt.date

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
# We notice that none of the tweets are from puerto rico or district of columbia, hence we drop them from the df


print(df[df['state'] == 'Puerto Rico'].shape[0])
print(df[df['state'] == 'District Of Columbia'].shape[0])


state_ids_df = state_ids_df.drop(state_ids_df[state_ids_df['State Name'] == 'Puerto Rico'].index.item())
state_ids_df = state_ids_df.drop(state_ids_df[state_ids_df['State Name'] == 'District of Columbia'].index.item())

In [None]:
state_ids_df

In [None]:
# We want to get the average sentiment per state over all time

avg_sentiments = []

for i in state_ids_df['id']:
    
    avg_sentiments.append(df['sentiment'][df[df['state_id'] == i].index].mean())

In [None]:
# We add the column to the df

state_ids_df['avg_sentiment'] = avg_sentiments

state_ids_df

In [None]:
# We now plot the graph

import plotly.express as px

fig = px.choropleth_mapbox(state_ids_df, geojson=states, locations='id', color='avg_sentiment',
                           color_continuous_scale="hot",
                           range_color=(-0.5, 0),
                           hover_name='State Name',
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'avg_sentiment':'Average Sentiment'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
# Now we want to plot the same thing over the days

# We see the range of the tweets

print(df['date'].min())
print(df['date'].max())

In [None]:
# We sort the df by the dates

df = df.sort_values(by = ['date'])

In [None]:
df

In [None]:
# We make a duplicate of df so that we can change the date column to only months

df_months = df

df_months['date'] = pd.to_datetime(df_months['date']).dt.month

In [None]:
df_months

In [None]:
# We test on the first month, 2020-03, and get the average of californias sentiment over that month

    
cali_df = df_months[df_months['state_id'] == 22]

cali_fm_df = cali_df[cali_df['date'] == 3]
cali_fm_df = cali_fm_df.reset_index()

print(cali_fm_df.shape[0])
print(cali_fm_df['sentiment'].mean())

In [None]:
# Now we make a dataframe doing this for each month for each state 

state_names = []
state_ids = []
months = []
avg_senitments_month = []
no_tweets_month = []

months_iterate = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2]

for m in months_iterate:
    for i in state_ids_df['id']:
        
        state_df = df_months[df_months['state_id'] == i]
        state_month_df = state_df[state_df['date'] == m]
        month_avg_sentiment = state_month_df['sentiment'].mean()
        no_tweets_ = state_month_df.shape[0]
        
        state_names.append(state_ids_df['State Name'][i])
        state_ids.append(i)
        months.append(m)
        avg_senitments_month.append(month_avg_sentiment)
        no_tweets_month.append(no_tweets_)

In [None]:
# Put it into a dataframe

columns = ['State Name', 'id', 'Month', 'Average Sentiment', 'Number Of Tweets']
month_avg_sentiment_df = pd.DataFrame(columns = columns)

month_avg_sentiment_df['State Name'] = state_names
month_avg_sentiment_df['id'] = state_ids
month_avg_sentiment_df['Month'] = months
month_avg_sentiment_df['Average Sentiment'] = avg_senitments_month
month_avg_sentiment_df['Number Of Tweets'] = no_tweets_month

In [None]:
month_avg_sentiment_df

In [None]:
# We want to change the month column to a string for the purpose of the graph

import calendar

month_avg_sentiment_df = month_avg_sentiment_df.reset_index()

for i in range(month_avg_sentiment_df.shape[0]):
    month_avg_sentiment_df['Month'][i] = calendar.month_name[month_avg_sentiment_df['Month'][i]]


month_avg_sentiment_df

In [None]:
# We now plot this information over time

fig = px.choropleth_mapbox(month_avg_sentiment_df, geojson=states, locations='id', color='Average Sentiment',
                           color_continuous_scale="hot",
                           range_color=(-1, 1),
                           hover_name='State Name',
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'Average Sentiment':'Average Sentiment'},
                           animation_frame ="Month"
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()