In [None]:
# !pip install kaleido -q

In [None]:
import pandas as pd
import tweepy
import math
import datetime
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, minmax_scale
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [None]:
user_info_df = pd.read_csv('../../data/user_info_updated.csv')

In [None]:
data_collection_end_time = '2021-12-31 23:59:59'
def user_impact(tweet_count, created_at, followers_count, listed_count, following_count):
    created_at = str(created_at)[:-6]
    profile_age = (datetime.datetime.strptime(data_collection_end_time, '%Y-%m-%d %H:%M:%S') - datetime.datetime.strptime(created_at, '%Y-%m-%d %H:%M:%S')).days
    
    impact = np.round(((followers_count * listed_count * math.log10((followers_count/following_count) + 1)) / (tweet_count * profile_age)), 7)
    
    return impact

In [None]:
user_info_df['impact'] = user_info_df[['tweet_count','created_at','followers_count','listed_count','following_count']].apply(lambda x: user_impact(*x), axis=1)

In [None]:
user_info_df

In [None]:
minMaxScaler = MinMaxScaler()
user_info_df[['user_impact_scaled']] = minMaxScaler.fit_transform(user_info_df[['user_impact']])

In [None]:
user_info_df['group'] = ''

user_info_df.iloc[:10]['group'] = 'Leaders'
user_info_df.iloc[10:]['group'] = 'Health Organizations'

In [None]:
labelEncoder = LabelEncoder()
user_info_df['group_category'] = labelEncoder.fit_transform(user_info_df['group'])
user_info_df['user_impact_scaled_by_group'] = user_info_df.groupby('group_category').user_impact.transform(lambda x:minmax_scale(x.astype(float)))
minMaxScaler = MinMaxScaler()
user_info_df[['user_impact_scaled']] = minMaxScaler.fit_transform(user_info_df[['user_impact']])

### Plots

In [None]:
fig = px.bar(user_info_df, x='username', y='user_impact', color='group', width=1200, height=500)
fig.update_layout(yaxis_title = 'User Impact', xaxis_title = 'Username',
    font = dict(
        size = 20,
        color = '#000000'
    ),
    # autosize = True,
    legend=dict(
        x=0.01,
        y=0.98,
        title_text='',
        traceorder='normal',
        font=dict(
            size=20,),
    ),
    margin = dict(
        l = 10,
        b = 10,
        r = 10,
        t = 10
    ), 
    # legend_title_text = 'Type of Organization'
)
fig.show()
fig.write_image('user-impact.pdf', engine='kaleido')
fig.write_image('user-impact.png', engine='kaleido')
# fig.write_image('user-impact.html', engine='kaleido')

In [None]:
fig = px.bar(user_info_df, x='username', y='user_impact_scaled', color='group', width=1200, height=500)
fig.update_layout(yaxis_title = 'User Impact (Scaled)', xaxis_title = 'Username',
    font = dict(
        size = 20,
        color = '#000000'
    ),
    # autosize = True,
    legend=dict(
        x=0.01,
        y=0.98,
        title_text='',
        traceorder='normal',
        font=dict(
            size=20,),
    ),
    margin = dict(
        l = 10,
        b = 10,
        r = 10,
        t = 10
    ), 
    # legend_title_text = 'Type of Organization'
)
fig.show()
fig.write_image('user-impact-scaled.pdf', engine='kaleido')
fig.write_image('user-impact-scaled.png', engine='kaleido')

In [None]:
fig = px.bar(user_info_df, x='username', y='user_impact_scaled_by_group', color='group', width=1250, height=500)
fig.update_layout(yaxis_title = 'User Impact (Scaled by Group)', xaxis_title = 'Username',
    font = dict(
        size = 19,
        color = '#000000'
    ),
    # autosize = True,
    legend=dict(
        x=0.01,
        y=0.98,
        title_text='',
        traceorder='normal',
        font=dict(
            size=16,),
    ),
    margin = dict(
        l = 10,
        b = 10,
        r = 10,
        t = 10
    ), 
    # legend_title_text = 'Type of Organization'
)
fig.write_image('user-impact-scaled-by-group.pdf', engine='kaleido')
fig.write_image('user-impact-scaled-by-group.png', engine='kaleido')
fig.show()

In [None]:
user_info_df.to_csv('../../../data/user_info_reframe.csv', index=False)