In [None]:
import io
import os
import glob
import re
import pandas as pd
import numpy as np
import scipy.stats as stats
from scipy.signal import savgol_filter

In [None]:
user_df = pd.read_csv('../../../data/twitter/raw/users/republicans/required/realDonaldTrump.csv')
user_info_df = pd.read_csv('../../../data/twitter/raw/user_info_reframe.csv')

SPAN = 300
WINDOW_LENGTH = 21
POLY_ORDER = 8

In [None]:
user_df.drop(['user_rt_id', 'user_rt', 'retweet_id', 'retweet_date'], axis = 1, inplace=True)
user_df.drop(['like_count','reply_count','retweet_count','quote_count','expanded_url','reply_to'], axis = 1, inplace=True)

In [None]:
user_df['replies_count'].fillna(0, inplace=True)
user_df['retweets_count'].fillna(0, inplace=True)
user_df['likes_count'].fillna(0, inplace=True)

In [None]:
user_df.isna().sum()

In [None]:
topicList = [
             ['recovering economy','dropped unemployment rate','v shaped recovery','k shaped recovery','country shutdown',
              'fewer jobs','covid crisis','heated trade war','great recession','buy american','eliminate Trump tax cuts',
              'student loan debt'],
             ['elections have consequences', 'fill US Supreme court seat','supreme court nominee','get rid of affordable care act',
              'amy coney barett','supreme court appointments', 'ending the filibuster','packing the cohort'],
             ['covid crisis','deadly disease','trump panicked','save lives','economy shutdown','wear masks','vaccine','china plague',
              'reopening plan','big rallies','smaller rallies', 'dc lockdown'],
             ['race issues','equity in equality','decency','floyd murder','peaceful protest','black lives matter','generate racist hatred',
              '1994 crime bill super-predators','law enforcement','demand law and order','systemic injustice','violence is inappropriate',
              'end racial senstivity training','increase in homicides','reimagining police','community police','prosecute violence', 'weapon ban'],
             ['vote','validate counting ballots','solicited ballot','irregularity in ballot','justice ballot by supreme court','testify votes','fair election',
              'manipulating ballots','pledge to not declare victory before ballots are independenly certified','urge supporters to stay calm while counting',
              'transparency','voter fraud','voter suppression','swing vote','gerrymandering','interventionism'],
             ['impeachment hoax','con job','rebuilt military','judge vaccancies','128 openings','more divided','more violent','caused recession','weaker','putins puppy',
              'hunter','fortune in moscow, ukraine, china','federal judges','veteran affairs','bronze star'],
             ['cut drug prices','public option to obamacare','government takeover of healthcare','socialize medicine','end obamacare'],
             ['fight pandemic','lower mortality rate','vaccine','operation warp speed','distributing ventilators','wear masks','rapid testing','national standards for reopening',
              'financial resources for reopening','increase in unemployment rate','committing suicide','depression','social distancing','plexi-glass separators','protect seniors',
              'trump panicked','sell short','remdesivir','self quarantine','sanitize','exercise','eat healthy','meditate'],
             ['healthcare','health insurance','affordable care act','terminated individual mandate','end obamacare','better healthcare','building on obamacare','bidencare',
              'obamacare with public option','eliminate private insurance','affordable healthcare','healthcare is right','socialized medicine','fracking','destroying medicare',
              'destroying social security','cut medicare','poverty','hunger','raise minimum wage','bail out small businesses','immigration','zero tolerance policy',
              'reunion of families'],
             ['institutional racism','free from violence','super predators','1994 crime bill','criminal justice reform','prison reform','eliminate minimum mandatories',
              'black lives matter movement is hate','climate of hate','banned muslims','bill on drug use','no jail for drug offense'],
             ['opportunity for jobs','best carbon emission standards','climate change','global warming','health and jobs are at stake','economic growth',
              'energy independent','fracking','zero emissions','sustainable','keep frontline communities safe','renewable energy','federal subsidy to oil industry',
              'forest fires in west','paris climate accord','climate change','lowest carbon','billion tree project','forest management','maintain forests','obama plain power plan',
              'green jobs','renewable energy','electric vehicles','net zero emission by 2035','global warming','green new deal'],
             ['security of elections','iran & russia influencing elections','iran sent messages to voters','intruders will pay a price','american sovereignity','russian pawn',
              'bounties to kill soldiers in afghanistan', 'interference from foreign adversaries','biden should lose','election security','business in china','pays tax in china',
              'tax returns of trump','corruption','big man','release tax return statements','foreign entaglements','bribe ukranian','bank account in china','play by rules','denuclearization',
              'north korea was a mess','nationalism'],
             ['inauguration day','rebuilding america','best black unemployment numbers','road to success','cut taxes','new regulations','together with success','depression','401ks will go to hell',
              'inaugural address','hope over fear','chose to move forward','grow economy','deal with systemic racism','motivated by clean energy','create millions of jobs','character of the country',
              'everyone has an even chance'],
             ['black lives matter','george floyd','blm','justice for floyd','floyd protest','colorism','defunding the police','no justice no peace','solidarity','performative activism','microagression',
              'black lives matter movement','police brutality','post-racial','racially motivated violence','george floyd protests','murder of George Floyd'],
             ['capitol hill','capitol riot','capitol attack','attempted violent overthrow','armed insurrection','assault on our democracy', 'mob riot','mob rule','insurrection','capitol storming',
              'capitol crowd','capitol hill attack','capitol incident','capitol hill incident','capitol','riot','peaceful protest'],
             ['US Elections','US Elections 2020','vote for biden','vote for trump','make america great again','promises made, promises kept','our best days still lie ahead',
              'building opportunity together','working people first','fighiting for our future','we rise','win the era','a fair shot for everyone','one nation, one destiny',
              'lead with love','no more wars','sleepy joe','vote blue to save america','trump is losing','biden harris','debate 2020','election 2020','trump vs biden','voting',
              'voting rights','elections','voter fraud','super tuesday','referendum','silent majority','democratic socialist','interventionism','nationalism'],
             ['inauguration','biden harris inauguration','celebrate america','inauguration day','inauguration day 2021','celebrate with joe','vice president harris','unity','diversity','democracy'],
            #  ['medical marijuana','legalize marijuana','marijuana liberalization policies','marijuana decriminalization','marijuana liberalization','recreational marijuana','marijuana policy'],
             ['LGBTQ', 'community', 'equality rights', 'racial', 'ethnic', 'black', 'inequalities', 'dignity', 'reject', 'disabilities','lgbt rights','gender neutrality','inclusiveness','fair and equal treatment'],
             ['weapon ban','boycott','curtail','guns','violence', 'magazines', 'high capacity','gun control','gun violence','mass shootings','gun control laws','reduce gun violence','gun control act',
              'firearm owners protection act','assault weapons ban','march for our lives','international gun control','brady law','open carry','background checks'],
             ['express tour','trains','labour unions','build back express tour','backbone of america','amtrack train tour','dignity of work','union rights','i will fight for you','middle class built america'],
             ['maternal', 'healthcare', 'obamacare', 'medicare', 'affordable', 'clean', 'health', 'care','health coverage','medicaid eligibility','insurance coverage','US citizens','legal residents',
              'reform private insurance','MMA','medicare modernization act','quality of health care','merit based incentive payment system','redducing health care costs']]

In [None]:
def isPhraseIn(phrase, text):
    return re.search(r"\b{}\b".format(phrase), text, re.IGNORECASE) is not None

In [None]:
def calculateAverageEngagementsPerDay(dataframe):
    # dataframe = dataframe[(dataframe['created_at']>=fromDate) & (dataframe['created_at']<=toDate)]

    dataframe['engagement_rate'] = dataframe['likes_count'].astype(int) + dataframe['replies_count'].astype(int) + dataframe['retweets_count'].astype(int)
    
    engagements_per_day = dataframe.groupby(['created_at']).agg({'engagement_rate':'sum'}).reset_index()
    tweets_per_day = (dataframe.groupby(['created_at'])['tweet'].count()).to_frame('tweets_per_day')
    
    average_engagements_per_day = tweets_per_day.merge(engagements_per_day, how='inner', on='created_at')
    average_engagements_per_day['average_engagement_per_day'] = np.round((average_engagements_per_day['engagement_rate']/ (3 * average_engagements_per_day['tweets_per_day'])), 2)
    
    return average_engagements_per_day

In [None]:
def topicWiseEngagement(user_df, topicList):
    print(user_df.shape)

    # Calculate engagement over each topic
    for topic_keywords in topicList:
        print(topic_keywords)

        topic_df = pd.DataFrame(columns = user_df.columns)

        for index, row in user_df.iterrows():
            for phrase in topic_keywords:
                if isinstance(row.tweet, float):
                    row.tweet = str(row.tweet)
                if isPhraseIn(phrase, row.tweet):
                    topic_df.loc[index] = row
        
        topic_df.drop_duplicates()

        username = user_df['username'].unique()[0]
        user_impact = user_info_df[user_info_df['username'] == username]['user_impact_scaled'].unique()[0]
        user_topic_engagement = calculateAverageEngagementsPerDay(topic_df)
        user_topic_engagement['EMA']= user_topic_engagement.iloc[:,3].ewm(span=SPAN, adjust=False).mean()
        user_topic_engagement['user'] = username  
        user_topic_engagement['user_impact'] = user_impact
        
        # Calculate z-score & Remove outliers
        user_topic_engagement['zscore'] = stats.zscore(user_topic_engagement['EMA'])
        user_topic_engagement = user_topic_engagement[(user_topic_engagement.zscore >= -3) & (user_topic_engagement.zscore <= 3)]
        
        # Curve Smoothing            
        if(WINDOW_LENGTH > topic_df.shape[0]):
            if(topic_df.shape[0] % 2 == 0):
                win_len = topic_df.shape[0]-1
            else:
                win_len = topic_df.shape[0]
            
            print(win_len)
            poly_order = win_len-1
            user_topic_engagement['EMA:Degree8'] = savgol_filter(user_topic_engagement['EMA'], win_len, poly_order)
        else:            
            user_topic_engagement['EMA:Degree8'] = savgol_filter(user_topic_engagement['EMA'], WINDOW_LENGTH, POLY_ORDER)
            
        user_topic_engagement['EMA:Degree8'] = savgol_filter(user_topic_engagement['EMA'], 1)
        
        # Add user-impact to EMA    
        user_topic_engagement['EMA*user_impact'] = user_topic_engagement['EMA'].mul(user_topic_engagement['user_impact'])
        
        avg_user_topic_engagement = np.round(user_topic_engagement[['EMA*user_impact']].mean()[0], 3)
        
        print('Average engagement for topic:', avg_user_topic_engagement)
        print('=============================================================================================================')
       

In [None]:
topicWiseEngagement(user_df, topicList)