# List of Influencers: Continuation

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [2]:
kenyan_tweets = pd.read_csv('data/Tweets_with_words.csv',low_memory=False)

## Get a list based on Popularity of tweets attained

In [3]:
#remove recurring values
a = kenyan_tweets[kenyan_tweets['retweet_count'] == 'retweet_count' ].index
kenyan_tweets.drop(a , inplace=True)

kenyan_tweets['retweet_count'] = pd.to_numeric(kenyan_tweets['retweet_count'])
kenyan_tweets['favorite_count'] = pd.to_numeric(kenyan_tweets['favorite_count'])

kenyan_popularity = kenyan_tweets.groupby('original_author', as_index=False).mean()
kenyan_popularity_values = kenyan_popularity.loc[:,['original_author','retweet_count','favorite_count']]

#Standardizing the data
array = np.array(kenyan_popularity_values['retweet_count'])
kenya_popularity_normalize = preprocessing.normalize([array])
kenya_popularity_normalized = pd.DataFrame(kenya_popularity_normalize).transpose()
kenyan_popularity_values['retweet_count_normalized']=kenya_popularity_normalized

array = np.array(kenyan_popularity_values['favorite_count'])
kenya_popularity_normalize = preprocessing.normalize([array])
kenya_popularity_normalized = pd.DataFrame(kenya_popularity_normalize).transpose()
kenyan_popularity_values['favorite_count_normalized']=kenya_popularity_normalized

#calculate popularity
kenyan_popularity_values['popularity']=kenyan_popularity_values['retweet_count_normalized']+kenyan_popularity_values['favorite_count_normalized']
kenyan_popularity_values = kenyan_popularity_values.sort_values(by=['popularity'], ascending=False)

#pick the top
kenyan_popularity_values = kenyan_popularity_values[kenyan_popularity_values['popularity'] >= 0.01]

## Get a list based on Mentions of tweets attained

In [4]:
mentions_in_kenya = kenyan_tweets['user_mentions']
username_mentions = []
for mentions in mentions_in_kenya:
    try:
        mention = mentions.split(', ')
        username_mentions.extend(mention)
    except AttributeError:
        continue


In [5]:
username_mentions = pd.Series(username_mentions).value_counts()
username_mentions = pd.DataFrame(username_mentions)
username_mentions = username_mentions[username_mentions[0]>=100]

## Combine both lists

In [6]:
influencers_in_kenya = []
influencers_in_kenya.extend(username_mentions.index)
influencers_in_kenya.extend(kenyan_popularity_values['original_author'])

#get influencers who are both popular and relevant from mentions
influencers_in_kenya = pd.Series(influencers_in_kenya).value_counts()
influencers_in_kenya = pd.DataFrame(influencers_in_kenya)

#combining both lists
influencers_in_kenya = list(influencers_in_kenya.index)

## Short listing time!
Let us get rid of those who have no reach

In [7]:
import sys
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import re
import string
from collections import Counter
from datetime import datetime, date, time, timedelta
import twitter

In [8]:
#Import the necessary methods from tweepy library  
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
from tweepy import Cursor

#sentiment analysis package
from textblob import TextBlob

#general text pre-processor
import nltk
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')

#tweet pre-processor 
import preprocessor as p

from dotenv import load_dotenv
load_dotenv()

from pathlib import Path  # Python 3.6+ only
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\smwik\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\smwik\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [9]:
#Variables that contains the user credentials to access Twitter API 
consumer_key = os.environ.get('TWITTER_API_KEY')
consumer_secret = os.environ.get('TWITTER_API_SECRET')
access_token = os.environ.get('TWITTER_ACCESS_TOKEN')
access_token_secret = os.environ.get('TWITTER_ACCESS_TOKEN_SECRET')

#This handles Twitter authetification and the connection to Twitter Streaming API
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)


In [10]:
influencers_shortlist = []
for username in influencers_in_kenya:
    try:
        search = api.search_users(username) 
        for user in search: 
            if user.screen_name == username:
                influencer_data = []
                influencer_data.extend([user.name,user.screen_name,user.followers_count,user.friends_count])
                influencers_shortlist.extend([influencer_data])
                
    except:
        continue

In [11]:
influencers_shortlist = pd.DataFrame(influencers_shortlist,columns = ['Name','Username','Number of Followers','Number of Following'])
influencers_shortlist['reach_score']= influencers_shortlist['Number of Followers'] - influencers_shortlist['Number of Following']
influencers_shortlist = influencers_shortlist[influencers_shortlist['reach_score']>=100000]
influencers_shortlist = list(influencers_shortlist['Username'])
influencers_shortlist = pd.DataFrame(influencers_shortlist)

In [12]:
influencers_shortlist

Unnamed: 0,0
0,kipmurkomen
1,TheStarBreaking
2,bonifacemwangi
3,DavidNdii
4,DCI_Kenya
...,...
103,GhettoRadio895
104,dailynation
105,CarolRadull
106,aajtak


In [13]:
influencers_shortlist.to_csv('data/influencers_shortlist.csv') 