In [40]:
import pandas as pd
import os
import classify_tweets as classify
import pickle as pp
import csv

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [24]:
def clean_data(data):
    # Remove any tweets that are retweets
    to_drop = data.loc[(data["Text"].str.contains("RT @"))]
    data.drop(list(to_drop.index), inplace = True)

    # Clean up the date format (must be in YYYY-MM-DD format)
    cleaned_dates = []
    for date in data.loc[:, "Date"]:
        cleaned_dates.append(date.split("T")[0])
    data.loc[:, "Date"] = cleaned_dates

    # Remove any links in tweets
    no_links = []
    for tweet in data.loc[:, "Text"]:
        no_links.append(tweet.split(" http")[0])
    data.loc[:, "Text"] = no_links
    
    return data

def filter_data(data):
    countries = []
    with open("data/countries.csv", "r") as countriesfile:
        reader = csv.reader(countriesfile)
        next(reader)
        for row in reader:
            countries.append(row)
    countries.remove(["KE", "Kenya"])
    
    target_tweets = []
    other_tweets = []
    frequency_map = {}
    for date, tweet in zip(data["Date"], data["Text"]):
        for country in countries:
            if country[1] in tweet:
                if [date, tweet] not in target_tweets:
                    target_tweets.append([date, tweet])
                if country[1] in frequency_map.keys():
                    frequency_map[country[1]] += 1
                else:
                    frequency_map[country[1]] = 1

    print(f"Number of tweets found is: {len(target_tweets)}")
    target_dataset = pd.DataFrame(target_tweets, columns=["Date", "Text"])

    return target_dataset

In [25]:
raw_pres_tweets = pd.read_csv("data/raw_tweets/raw_president_tweets.csv")
clean_pres_tweets = clean_data(raw_pres_tweets)
filtered_tweets = filter_data(clean_pres_tweets)

Number of tweets found is: 1503


In [43]:
clean_pres_tweets.to_csv("data/raw_tweets/clean_president_tweets.csv")
filtered_tweets.to_csv("data/raw_tweets/filtered_president_tweets.csv")

In [26]:
loaded_vec = pp.load(open("data/model_training/200_samples/TfidfVectorizer().pickle", 'rb'))
loaded_classifier = pp.load(open("data/model_training/200_samples/CalibratedClassifierCV().pickle", 'rb'))

print(f"Loaded vectorizer and classifier")

Loaded vectorizer and classifier


In [29]:
preds_arr = []
for msg in filtered_tweets.Text:
    res = classify.predict(msg, loaded_vec, loaded_classifier)
    preds_arr.append([msg, res])

In [30]:
prediction_df = pd.DataFrame(preds_arr, columns=["Text", "Category"])

Unnamed: 0,Text,Category
0,"Prime Cabinet Secretary @MusaliaMudavadi, Nati...",UNRELATED
1,He explained that his Senegalese counterpart h...,UNRELATED
2,"On his part, President Sall called on African ...",UNRELATED
3,President @WilliamsRuto and his Senegalese cou...,UNRELATED
4,"KENYA, SENEGAL AGREE TO CHAMPION THE INTEGRATI...",UNRELATED


In [42]:
prediction_df.loc[prediction_df["Category"] == "AWAY"]

Unnamed: 0,Text,Category
35,President Ruto was on a one-day official visit to South Sudan. He was accompanied by Roads and infrastructure Cabinet Secretary @kipmurkomen among others.\n\nSouth Sudan’s First Vice President Riek Machar was present at the press briefing.,AWAY
55,"He said: ""The imbalance of trade favours Korea and Parliament can be instrumental in addressing this situation."" \n\nPresident Ruto made the remarks at the Parliament building in Seoul, South Korea, when he held talks with speaker Kim Jin Pyo.",AWAY
62,President Ruto's first meeting on Tuesday will be with the Kenyan community living in South Korea.\n\nThe President is in South Korea for a three-day official visit on the invitation of his counterpart Yoon Suk Yeol.,AWAY
63,"KENYA TO EXPAND ECONOMIC TIES AND DIPLOMATIC RELATIONS WITH SOUTH KOREA\n\nPresident @WilliamsRuto has arrived in Seoul, South Korea, for bilateral talks with President Yoon Suk Yeol.",AWAY
68,"The Head of State was speaking at State House, Nairobi, after holding bilateral talks with South African President @CyrilRamaphosa who is in the country for a two-day State Visit.",AWAY
78,"He was speaking on Monday in Sharm El-Sheikh, Egypt, when he held bilateral talks with President William Ruto.\n\nThe Prime Minister lauded Kenya for pioneering climate leadership and pledged to further support the country in advancing regional peace and security.",AWAY
82,He will advocate for the implementation of the Paris Agreement adopted by 196 Parties at COP21 in 2015 that undertakes to combat climate change and adapt to its effects.\n\nPresident Ruto later met his Rwanda counterpart @PaulKagame for bilateral talks.,AWAY
83,"PRESIDENT RUTO TO PUSH AFRICA’S AGENDA AT CLIMATE CHANGE CONFERENCE\n\nPresident @WilliamsRuto has arrived in Sharm El-Sheikh, Egypt for the 2022 United Nations Climate Change Conference (COP27).",AWAY
122,"PRESIDENT WILLIAM RUTO ARRIVES IN TANZANIA\n\nPresident @WilliamsRuto Sunday evening arrived in Dar es Salaam for a two-day state visit.\n\nHe is scheduled to hold talks with his Tanzanian counterpart, President @SuluhuSamia on Monday.",AWAY
164,"2/5\nOn arrival at the Arusha Airport, President Kenyatta was received by Tanzanian Foreign Affairs and East African Cooperation Minister Amb. Liberata Mulamula and Kenya’s acting EAC &amp; Regional Development CS Betty Maina who is also the chairperson of the EAC Council of Ministers",AWAY


In [44]:
prediction_df.to_csv("data/predictions/presidential_tweets.csv", index=False)