In [5]:

#pip install tweepy

Collecting tweepy
  Downloading tweepy-4.13.0-py3-none-any.whl (102 kB)
Note: you may need to restart the kernel to use updated packages.
Installing collected packages: tweepy
Successfully installed tweepy-4.13.0


In [1]:
import pandas as pd
import tweepy
import math
import datetime
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, minmax_scale
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [2]:
user_info_df = pd.read_csv('../../data/user_info.csv')

In [3]:
data_collection_end_time = '2020-12-31 23:59:59'
def user_impact(tweet_count, created_at, followers_count, listed_count, following_count):
    created_at = str(created_at)[:-6]
    profile_age = (datetime.datetime.strptime(data_collection_end_time, '%Y-%m-%d %H:%M:%S') - datetime.datetime.strptime(created_at, '%Y-%m-%d %H:%M:%S')).days
    impact = np.round(((tweet_count * listed_count * math.log10((followers_count/(following_count+1))**0.5 + 1)) / (profile_age**2)), 7)
    
    return impact

In [4]:
user_info_df['impact'] = user_info_df[['tweet_count','created_at','followers_count','listed_count','following_count']].apply(lambda x: user_impact(*x), axis=1)

In [5]:
user_info_df

Unnamed: 0,created_at,name,username,followers_count,following_count,tweet_count,listed_count,description,location,verified,impact
0,2015-09-01 23:37:11+00:00,PIENSAPRENSA 327 mil Seguidores,PiensaPrensa,327907,39521,72612,767,"Chilenos, autogestionados, Independientes, com...","Santiago, Chile ±56987524113",True,8.642746
1,2009-02-28 23:47:11+00:00,Felipe Parada 🔻,FelipeParadaM,79830,3840,197773,144,"Hijo de obrero. Rebeldía, Resistencia y Amor |...","Providencia, Chile",False,1.134771
2,2010-07-19 10:52:44+00:00,Vagabundo ilustrado,vagoilustrado,125296,271,15847,168,"Otoñista, Yournalista, analista y maestro tier...",@vagoilustrado@seda.social,False,0.246825
3,2013-06-01 03:31:15+00:00,Carlos Santander,csantander23,2431,256,7338,3,San Antonio — Melipilla,,False,0.001751
4,2016-08-09 00:12:48+00:00,Andrés 💪🕊 🍁✊ #YoAnulo,andres20ad,41891,13171,46890,57,"Uno de muchos, que luchan por mejorar este mun...",,False,0.461256
5,2012-11-24 01:51:19+00:00,Chileokulto,Chileokulto,236743,58970,248120,335,@Chileokulto El primer Panfleto virtual de Chi...,En el Chile que nadie ve.,False,4.534446
6,2009-04-24 18:06:25+00:00,Hernán,hernan_sr,30977,11302,134503,90,tweets con ají incluido. 🌶️,,True,0.28173
7,2018-06-13 14:18:58+00:00,HonorYGloria ❤🔥,JoviNomas,34594,1694,113660,37,"Bajo el cielo de Moscú.\nMilitante del dolor, ...",Constelación de Orión ⭐,False,3.591195
8,2008-04-19 17:43:02+00:00,El Ciudadano,El_Ciudadano,581822,71419,240898,1785,https://t.co/hF7jqSE6Tk medio de comunicación ...,Santiago de Chile,True,11.70771
9,2010-07-19 00:03:03+00:00,GAMBA,GAMBA_CL,213205,137,81389,384,El medio más carismático de la historia. Centr...,,False,3.441916


In [6]:
minMaxScaler = MinMaxScaler()
user_info_df[['user_impact_scaled']] = minMaxScaler.fit_transform(user_info_df[['impact']])

In [36]:
user_info_df['group'] = ''

user_info_df['group'][:11] = 'Leaders'
user_info_df['group'][11:] = 'Health Organizations'

In [37]:
labelEncoder = LabelEncoder()
user_info_df['group_category'] = labelEncoder.fit_transform(user_info_df['group'])
user_info_df['user_impact_scaled_by_group'] = user_info_df.groupby('group_category').impact.transform(lambda x:minmax_scale(x.astype(float)))
minMaxScaler = MinMaxScaler()
user_info_df[['user_impact_scaled']] = minMaxScaler.fit_transform(user_info_df[['impact']])

In [38]:
user_info_df[user_info_df["username"] == "HSELive"]

Unnamed: 0,created_at,name,username,followers_count,following_count,tweet_count,listed_count,description,location,verified,impact,user_impact_scaled,group,group_category,user_impact_scaled_by_group,impact2
13,2009-08-27 14:03:51+00:00,HSE Ireland,HSELive,258182,506,35904,606,Your official guide to what's happening in the...,Ireland,False,1.468591,0.027521,Health Organizations,0,0.084849,1.468591


### Plots

In [40]:
fig = px.bar(user_info_df, x='username', y='impact', color='group', width=1200, height=500)
fig.update_layout(yaxis_title = 'User Impact', xaxis_title = 'Username',
    font = dict(
        size = 20,
        color = '#000000'
    ),
    # autosize = True,
    legend=dict(
        x=0.01,
        y=0.98,
        title_text='',
        traceorder='normal',
        font=dict(
            size=20,),
    ),
    margin = dict(
        l = 10,
        b = 10,
        r = 10,
        t = 10
    ), 
    # legend_title_text = 'Type of Organization'
)
fig.show()
#fig.write_image('user-impact.pdf', engine='kaleido')
#fig.write_image('user-impact.png', engine='kaleido')
# fig.write_image('user-impact.html', engine='kaleido')

In [41]:
fig = px.bar(user_info_df, x='username', y='user_impact_scaled', color='group', width=1200, height=500)
fig.update_layout(yaxis_title = 'User Impact (Scaled)', xaxis_title = 'Username',
    font = dict(
        size = 20,
        color = '#000000'
    ),
    # autosize = True,
    legend=dict(
        x=0.01,
        y=0.98,
        title_text='',
        traceorder='normal',
        font=dict(
            size=20,),
    ),
    margin = dict(
        l = 10,
        b = 10,
        r = 10,
        t = 10
    ), 
    # legend_title_text = 'Type of Organization'
)
fig.show()
#fig.write_image('user-impact-scaled.pdf', engine='kaleido')
#fig.write_image('user-impact-scaled.png', engine='kaleido')

In [42]:
fig = px.bar(user_info_df, x='username', y='user_impact_scaled_by_group', color='group', width=1250, height=500)
fig.update_layout(yaxis_title = 'User Impact (Scaled by Group)', xaxis_title = 'Username',
    font = dict(
        size = 19,
        color = '#000000'
    ),
    # autosize = True,
    legend=dict(
        x=0.01,
        y=0.98,
        title_text='',
        traceorder='normal',
        font=dict(
            size=16,),
    ),
    margin = dict(
        l = 10,
        b = 10,
        r = 10,
        t = 10
    ), 
    # legend_title_text = 'Type of Organization'
)
fig.show()
#fig.write_image('user-impact-scaled-by-group.pdf', engine='kaleido')
#fig.write_image('user-impact-scaled-by-group.png', engine='kaleido')


In [39]:
user_info_df.to_csv('../../data/user_info_updated.csv', index=False)