In [1]:
import pandas as pd
import numpy as np
import glob
import functools
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
import seaborn as sns

In [2]:
%matplotlib inline
%precision 4
warnings.filterwarnings('ignore')
plt.style.use('seaborn')
np.set_printoptions(suppress=True)
pd.set_option("display.precision", 15)

In [3]:
def sentiment_per_day(dataframe, fromDate, toDate):
    dataframe = dataframe[(dataframe['created_at']>=fromDate) & (dataframe['created_at']<=toDate)]
    
    daily_sentiment = dataframe.groupby([dataframe['date'].dt.date]).agg({'positive':'sum', 'negative':'sum','neutral':'sum'})
    # daily_sentiment = dataframe.groupby([dataframe['date'].dt.date]).agg({'positive':'sum', 'negative':'sum'})
    tweets_per_day = (dataframe.groupby([dataframe['date'].dt.date])['tweet'].count()).to_frame('tweets_per_day')
    average_sentiments_per_day = tweets_per_day.merge(daily_sentiment, how='inner', on='date')
    
    average_sentiments_per_day['positive'] = np.round(average_sentiments_per_day['positive']/average_sentiments_per_day['tweets_per_day'],6)
    average_sentiments_per_day['negative'] = np.round(average_sentiments_per_day['negative']/average_sentiments_per_day['tweets_per_day'],6)
    average_sentiments_per_day['neutral'] = np.round(average_sentiments_per_day['neutral']/average_sentiments_per_day['tweets_per_day'],6)
    
    return average_sentiments_per_day

In [4]:
pre_covid_from = '2017-01-01T00:00:00'
pre_covid_to = '2020-02-26 23:59:59'
during_covid_from = '2020-02-27 00:00:00'
during_covid_to =  '2021-09-30T23:59:59'

In [5]:
user_folder_path = '../../data/twitter/'

pharma_df = pd.concat([pd.read_csv(f, sep=',') for f in glob.glob(user_folder_path + "/pharma companies/*.csv")],ignore_index=True)
# pharma_df = pd.read_csv(user_folder_path+'pharma companies/biogen.csv')
pharma_df['date'] = pd.to_datetime(pharma_df['created_at'])

In [6]:
pre_pharma_companies_average_sentiments_per_day = sentiment_per_day(pharma_df, pre_covid_from, pre_covid_to)
fig = px.line(pre_pharma_companies_average_sentiments_per_day, x=pre_pharma_companies_average_sentiments_per_day.index, y=['positive','negative','neutral'],title='Average Change in Sentiments-Pharma Companies',
              labels={'variable':'sentiment', 'date':'Date','value':'Average sentiment per day'})
# fig = px.line(pre_pharma_companies_average_sentiments_per_day, x=pre_pharma_companies_average_sentiments_per_day.index, y=['positive','negative'],title='Average Change in Sentiments-Pharma Companies',
#               labels={'variable':'sentiment', 'date':'Date','value':'Average sentiment per day'})
fig.show()

In [7]:
pre_pharma_companies_average_sentiments_per_day

Unnamed: 0_level_0,tweets_per_day,positive,negative,neutral
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-02,4,0.446106,0.141819,0.412075
2017-01-03,13,0.361201,0.098578,0.540221
2017-01-04,11,0.445351,0.067965,0.486685
2017-01-05,14,0.473588,0.059000,0.467412
2017-01-06,15,0.550445,0.012949,0.436606
...,...,...,...,...
2020-02-22,3,0.754744,0.018287,0.226969
2020-02-23,1,0.658171,0.001189,0.340640
2020-02-24,9,0.536235,0.165669,0.298096
2020-02-25,14,0.579030,0.070099,0.350872
