In [1]:
from pprint import pprint
from bs4 import BeautifulSoup
import re
from nltk.tokenize import WordPunctTokenizer
import os
import joblib
import pandas as pd

In [2]:
country_leaders = {'United States':['realDonaldTrump','POTUS','WhiteHouse','StateDept'], 'Vatican':['Pontifex'], 'India':['narendramodi','PMOIndia','SushmaSwaraj','rashtrapatibhvn','MEAIndia','IndianDiplomacy'], 'Turkey':['RT_Erdogan','tcbestepe','TC_Disisleri','MevlutCavusoglu'], 'Indonesia':['jokowi'], 'Jordan':['QueenRania'], 'United Arab Emirates': ['HHShkMohd','ABZayed','MohamedBinZayed'], 'Pakistan':['ImranKhanPTI','SMQureshiPTI','ArifAlvi'], 'Saudi Arabia':['KingSalman','AdelAljubeir','KSAMOFA'], 'United Kingdom':['GOVUK','10DowningStreet','RoyalFamily'], 'Russia': ['MedvedevRussia', 'KremlinRussia'], 'Mexico':['lopezobrador_','m_ebrard'], 'Argentina':['mauriciomacri'], 'Canada':['JustinTrudeau'], 'France':['EmmanuelMacron', 'Elysee'], 'Venezuela':['NicolasMaduro','jaarreaza'], 'Brazil': ['jairbolsonaro'], 'Egypt':['AlsisiOfficial'], 'Chile':['sebastianpinera'], 'South Korea': ['moonriver365'], 'Nigeria': ['MBuhari','NGRPresident'], 'Ecuador':['Presidencia_Ec'], 'Lebanon':['saadhariri'], 'Israel':['netanyahu'], 'Rwanda': ['PaulKagame']}
countryinfo = ['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antarctica', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegowina', 'Botswana', 'Bouvet Island', 'Brazil', 'British Indian Ocean Territory', 'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Christmas Island', 'Cocos (Keeling) Islands', 'Colombia', 'Comoros', 'Congo', 'Congo, the Democratic Republic of the', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia (Hrvatska)', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Falkland Islands (Malvinas)', 'Faroe Islands', 'Fiji', 'Finland', 'France', 'France Metropolitan', 'French Guiana', 'French Polynesia', 'French Southern Territories', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe', 'Guam', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Heard and Mc Donald Islands', 'Holy See (Vatican City State)', 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran (Islamic Republic of)', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', "Korea, Democratic People's Republic of", 'Korea, Republic of', 'Kuwait', 'Kyrgyzstan', "Lao, People's Democratic Republic", 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libyan Arab Jamahiriya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macau', 'Macedonia, The Former Yugoslav Republic of', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Martinique', 'Mauritania', 'Mauritius', 'Mayotte', 'Mexico', 'Micronesia, Federated States of', 'Moldova, Republic of', 'Monaco', 'Mongolia', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'Netherlands Antilles', 'New Caledonia', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue', 'Norfolk Island', 'Northern Mariana Islands', 'Norway', 'Oman', 'Pakistan', 'Palau', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Pitcairn', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar', 'Reunion', 'Romania', 'Russian Federation', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia (Slovak Republic)', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Georgia and the South Sandwich Islands', 'Spain', 'Sri Lanka', 'St. Helena', 'St. Pierre and Miquelon', 'Sudan', 'Suriname', 'Svalbard and Jan Mayen Islands', 'Swaziland', 'Sweden', 'Switzerland', 'Syrian Arab Republic', 'Taiwan, Province of China', 'Tajikistan', 'Tanzania, United Republic of', 'Thailand', 'Togo', 'Tokelau', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Turks and Caicos Islands', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'United States Minor Outlying Islands', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela', 'Vietnam', 'Virgin Islands (British)', 'Virgin Islands (U.S.)', 'Wallis and Futuna Islands', 'Western Sahara', 'Yemen', 'Yugoslavia', 'Zambia', 'Zimbabwe']
BASE_PATH = 'Tweet_Data/'
EXT = '_tweets.csv'
MODEL_DIR = "models"

In [3]:
if os.path.exists(os.path.join(MODEL_DIR,'model.pkl')):
    sentiment_pipeline = joblib.load(os.path.join(MODEL_DIR,'model.pkl'))
else:
    print("Model Not Found")

In [4]:
tok = WordPunctTokenizer()
pat1 = r'@[A-Za-z0-9]+'
pat2 = r'https?://[A-Za-z0-9./]+'
combined_pat = r'|'.join((pat1, pat2))
def tweet_cleaner(text):
    soup = BeautifulSoup(text, 'lxml')
    souped = soup.get_text()
    stripped = re.sub(combined_pat, '', souped)
    try:
        clean = stripped.decode("utf-8-sig").replace(u"\ufffd", "?")
    except:
        clean = stripped
    letters_only = re.sub("[^a-zA-Z]", " ", clean)
    lower_case = letters_only.lower()
    words = tok.tokenize(lower_case)
    return (" ".join(words)).strip()

In [5]:
for i in country_leaders.keys():
    final = {}
    print('Mapping Relations of',i)
    for j in country_leaders[i]:
        tweet_file = os.path.join(BASE_PATH,j+EXT)
        names = ('id', 'created_at', 'text')
        df = pd.read_csv(tweet_file, encoding='latin1', names=names)
        for t in df['text']:
            for x in range(len(countryinfo)):
                if type(countryinfo[x]) == type(t) and countryinfo[x] in t:
                    stmt = tweet_cleaner(t)
                    val = int(sentiment_pipeline.predict([stmt]))
                    if countryinfo[x] not in final:
                        final[countryinfo[x]] = val
                    else:
                        final[countryinfo[x]]+=val
    pprint(final)

Mapping Relations of Country United States
{'Afghanistan': 4,
 'Albania': 3,
 'Algeria': 2,
 'Angola': 8,
 'Argentina': 50,
 'Armenia': 1,
 'Australia': 20,
 'Austria': 7,
 'Azerbaijan': 1,
 'Bahamas': 8,
 'Bahrain': 12,
 'Bangladesh': -1,
 'Belgium': 25,
 'Benin': -1,
 'Bhutan': 1,
 'Brazil': 33,
 'Bulgaria': 5,
 'Burundi': 1,
 'Cambodia': 1,
 'Cameroon': 3,
 'Canada': 60,
 'Chile': 2,
 'China': 77,
 'Colombia': 34,
 'Congo': 4,
 'Cuba': 15,
 'Cyprus': 3,
 'Czech Republic': 1,
 'Denmark': 2,
 'Djibouti': 1,
 'Dominica': 8,
 'Dominican Republic': 8,
 'Ecuador': 4,
 'Egypt': 38,
 'El Salvador': 2,
 'Equatorial Guinea': 1,
 'Estonia': 3,
 'Ethiopia': -2,
 'Fiji': 1,
 'Finland': 13,
 'France': 61,
 'Georgia': 35,
 'Germany': 15,
 'Ghana': 19,
 'Greece': 8,
 'Guam': 2,
 'Guatemala': 4,
 'Guinea': 8,
 'Guyana': 1,
 'Haiti': 3,
 'Honduras': 8,
 'Hong Kong': 3,
 'Hungary': 8,
 'Iceland': 5,
 'India': 45,
 'Indonesia': 16,
 'Iraq': 48,
 'Ireland': 15,
 'Israel': 65,
 'Italy': 15,
 'Jamaica': 3

{'Afghanistan': 8,
 'Australia': 4,
 'Bahamas': -1,
 'Bahrain': -1,
 'Bangladesh': 3,
 'Belgium': 1,
 'Brazil': -1,
 'Canada': -1,
 'China': 13,
 'Cuba': 2,
 'Denmark': 1,
 'Egypt': -1,
 'Ethiopia': 1,
 'Finland': 1,
 'France': -1,
 'Germany': 1,
 'Iceland': 1,
 'India': 27,
 'Indonesia': 0,
 'Iraq': -5,
 'Ireland': 0,
 'Israel': 7,
 'Japan': 2,
 'Kenya': 1,
 'Lebanon': 0,
 'Malaysia': 1,
 'Maldives': 1,
 'Mali': 19,
 'Mexico': 1,
 'Myanmar': 2,
 'Nepal': 1,
 'New Zealand': 3,
 'Oman': 2,
 'Pakistan': 428,
 'Panama': 6,
 'Peru': 0,
 'Qatar': -1,
 'Romania': 2,
 'Saudi Arabia': 0,
 'Singapore': 1,
 'Somalia': -1,
 'South Africa': 0,
 'Sri Lanka': -1,
 'Sweden': -1,
 'Switzerland': 2,
 'Tajikistan': 1,
 'Turkey': 5,
 'United States': 2,
 'Vietnam': -2,
 'Yemen': 0,
 'Zimbabwe': 1}
Mapping Relations of Country Saudi Arabia
{'Algeria': 1,
 'Angola': 1,
 'Argentina': 10,
 'Austria': 1,
 'Bahrain': 4,
 'Belgium': 2,
 'Canada': 2,
 'China': 15,
 'Cyprus': 1,
 'Czech Republic': 1,
 'Egypt': 8,

{'Argentina': 2,
 'Australia': 1,
 'Canada': 1,
 'Chile': 10,
 'China': 43,
 'Colombia': 21,
 'Costa Rica': 3,
 'Cuba': 5,
 'Ecuador': 433,
 'Guam': 1,
 'Guatemala': 12,
 'Hong Kong': 1,
 'Mexico': 8,
 'Panama': 3,
 'Paraguay': 1,
 'Peru': 48,
 'Qatar': 1,
 'Reunion': 4,
 'Uruguay': 1,
 'Venezuela': 25}
Mapping Relations of Country Lebanon
{'China': 0,
 'France': 3,
 'Germany': 1,
 'Lebanon': 46,
 'Pakistan': 1,
 'Saudi Arabia': -1,
 'Spain': 1,
 'Sweden': 1,
 'United States': 2}
Mapping Relations of Country Israel
{'Albania': 1,
 'Antarctica': -1,
 'Argentina': 3,
 'Armenia': 1,
 'Australia': 11,
 'Austria': 5,
 'Azerbaijan': 1,
 'Belgium': 1,
 'Brazil': 13,
 'Bulgaria': 3,
 'Canada': 4,
 'Cape Verde': -1,
 'Chad': 3,
 'Chile': 1,
 'China': 3,
 'Colombia': 5,
 'Cyprus': 8,
 'Denmark': -1,
 'Ecuador': 1,
 'Egypt': 2,
 'Ethiopia': 2,
 'Fiji': 1,
 'France': 4,
 'Gabon': 1,
 'Georgia': 1,
 'Germany': 4,
 'Greece': 6,
 'Guatemala': 4,
 'Hungary': 3,
 'India': 36,
 'Israel': 282,
 'Italy': 