In [1]:
import math
import datetime
import pandas as pd
import numpy as np
import tweepy
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
import plotly.express as px

In [None]:
bearer_token = ""
client = tweepy.Client(bearer_token=bearer_token)

start_time = '2017-01-21T00:00:00Z'
end_time = '2021-01-27T23:59:59Z'

In [None]:
usernames = ['GOP']

In [None]:
user_info = client.get_users(usernames=usernames, user_fields=['created_at','public_metrics','description','location','verified'])

In [None]:
user_info_df = pd.DataFrame(columns=['created_at','name','username','followers_count','following_count','tweet_count','listed_count','description','location','verified'])

In [None]:
for user in user_info.data:    
    user_info_df = user_info_df.append({'created_at':user.created_at, 'name':user.name, 'username':user.username, 'followers_count':user.public_metrics.get('followers_count'),
                                        'following_count': user.public_metrics.get('following_count'), 'tweet_count':user.public_metrics.get('tweet_count'), 
                                        'listed_count':user.public_metrics.get('listed_count'), 'description':user.description, 'location':user.location, 'verified':user.verified}, 
                                       ignore_index=True)

In [None]:
user_info_df

In [None]:
user_info_df.to_csv('../../../data/twitter/raw/user_info1.csv', index=False)

In [2]:
user_info_df = pd.read_csv('../../../data/twitter/raw/user_info_reframe.csv')

In [3]:
user_info_df.columns

Index(['created_at', 'name', 'username', 'followers_count', 'following_count',
       'tweet_count', 'listed_count', 'description', 'location', 'verified',
       'pol_party'],
      dtype='object')

In [4]:
data_collection_end_time = '2021-01-27 23:59:59'

def orderOfMagnitude(number):
    if number != 0:
        return math.floor(math.log(number, 10))
    else:
        return 0

def user_impact(tweet_count, created_at, followers_count, listed_count, following_count):
    created_at = str(created_at)[:-6]
    profile_age = (datetime.datetime.strptime(data_collection_end_time, '%Y-%m-%d %H:%M:%S') - datetime.datetime.strptime(created_at, '%Y-%m-%d %H:%M:%S')).days
    
    ftf_ratio = math.log10((followers_count/following_count) + 1)
    # oom_followers = orderOfMagnitude(followers_count)
    
    impact = np.round(((followers_count*listed_count*ftf_ratio)/(profile_age*tweet_count)),10)
    
    return impact

In [5]:
user_info_df.columns

Index(['created_at', 'name', 'username', 'followers_count', 'following_count',
       'tweet_count', 'listed_count', 'description', 'location', 'verified',
       'pol_party'],
      dtype='object')

In [6]:
user_info_df['user_impact'] = user_info_df[['tweet_count','created_at','followers_count','listed_count','following_count']].apply(lambda x: user_impact(*x), axis=1)

In [7]:
minMaxScaler = MinMaxScaler()
user_info_df[['user_impact_scaled']] = minMaxScaler.fit_transform(user_info_df[['user_impact']])

## Plots

In [8]:
user_info_df

Unnamed: 0,created_at,name,username,followers_count,following_count,tweet_count,listed_count,description,location,verified,pol_party,user_impact,user_impact_scaled
0,2009-02-27 23:04:51+00:00,Mike Pence,Mike Pence,9241258,26,10121,39691,"Husband, father, 48th Vice President of the Un...",,True,Republicans,46223.539076,0.259792
1,2008-03-18 17:15:41+00:00,House Republicans,HouseGOP,1953213,1203,42935,9499,Text FREEDOM to 80810 to receive exclusive upd...,,True,Republicans,295.331446,0.0
2,2008-04-13 20:23:13+00:00,The Democrats,TheDemocrats,2181227,73,36241,12410,"We’re fighting for a better, fairer, and brigh...","Washington, D.C.",True,Democrats,715.485395,0.002377
3,2009-04-11 00:42:07+00:00,Kamala Harris,KamalaHarris,19095700,733,16253,26480,"Fighting for the people. Wife, Momala, Auntie....","Washington, DC",True,Democrats,31882.88978,0.178674
4,2007-03-11 17:51:24+00:00,Joe Biden,JoeBiden,32044976,48,7849,37763,"Husband to @DrBiden, proud father and grandfat...","Washington, DC",True,Democrats,177083.702225,1.0
5,2009-03-18 00:00:00+00:00,Donald J. Trump,realDonaldTrump,52589688,31,30864,67028,"46th President of the United States, husband t...",,True,Republicans,164199.517233,0.927121


In [11]:
fig = px.bar(user_info_df, x='username', y='user_impact', color='pol_party', title='User impact', width=1600, height=500)
fig.show()
# fig.write_image('../../../results/user-impact/by-username.png', engine='kaleido')

In [13]:
user_info_df.to_csv('../../../data/twitter/raw/user_info_reframe.csv', index=False)