In [1]:
import pandas as pd

In [2]:
df_trump = pd.read_csv("../data/trump_tweets_preprocessed.csv")
df_biden = pd.read_csv("../data/biden_tweets_preprocessed.csv")

In [3]:
# making strings to datetime objects and removing the seconds
df_biden['timestamp'] = pd.to_datetime(df_biden['created_at']).dt.strftime('%Y-%m-%d %H:%M')
df_trump['timestamp'] = pd.to_datetime(df_trump['created_at']).dt.strftime('%Y-%m-%d %H:%M')

In [4]:
# taking only tweet time and sentiment score, then aggregating per time (min)
df_biden_per_min = df_biden[['timestamp','sentiment_score']].groupby("timestamp").describe()
df_trump_per_min = df_trump[['timestamp','sentiment_score']].groupby("timestamp").describe()

In [5]:
# keeping only mean, max and min sentiment for each minute
trump_sentiment_min = df_trump_per_min['sentiment_score'][['count','mean','max','min']]
biden_sentiment_min = df_biden_per_min['sentiment_score'][['count','mean','max','min']]

In [6]:
# making the column names more explicit
biden_sentiment_min = biden_sentiment_min.rename(columns={"count":"tweets","mean":"mean_sentiment","max":"max_sentiment","min":"min_sentiment"})
trump_sentiment_min = trump_sentiment_min.rename(columns={"count":"tweets","mean":"mean_sentiment","max":"max_sentiment","min":"min_sentiment"})

biden_sentiment_min = biden_sentiment_min.reset_index()
trump_sentiment_min = trump_sentiment_min.reset_index()

In [7]:
# saving the dataframes as .csv files
trump_sentiment_min.to_csv("../data/trump_tweets_per_min.csv", index=False)
biden_sentiment_min.to_csv("../data/biden_tweets_per_min.csv", index=False)

## New stuff - final per minute csv's

In [8]:
trump = pd.read_csv("../data/trump_tweets_per_min.csv", parse_dates=['timestamp'])
biden = pd.read_csv("../data/biden_tweets_per_min.csv", parse_dates=['timestamp'])

In [9]:
trump_before = trump[trump['timestamp'] < pd.Timestamp('2020-10-22 21:00')]
biden_before = biden[biden['timestamp'] < pd.Timestamp('2020-10-22 21:00')]
trump_after = trump[trump['timestamp'] >= pd.Timestamp('2020-10-22 21:00')].reset_index(drop=True)
biden_after = biden[biden['timestamp'] >= pd.Timestamp('2020-10-22 21:00')].reset_index(drop=True)

In [10]:
interrupt = pd.read_csv('../data/interruptions_per_minute_adjusted.csv', parse_dates=['timestamp'])
interrupt = interrupt.drop(['minute', 'part', 'timestamp'], axis = 1)

In [11]:
trump_final = pd.concat([trump_after, interrupt], axis = 1)
biden_final = pd.concat([biden_after, interrupt], axis = 1)

In [12]:
trump_final.to_csv("../data/trump_sentiment_interruption_per_minute.csv", index = False)
biden_final.to_csv("../data/biden_sentiment_interruption_per_minute.csv", index = False)