In [62]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [100]:
# Turns the event timestamp hour into a daily tick number (ex: 00:15 -> 1, 1:30 -> 6, etc..)
def date_to_daytick(date):
    hour = date[8:10]
    minutes = date[10:12]
    return int(4*int(hour) + int(minutes)/15)

# Takes a csv file containing event rows from the EVENT gdelt table and produces a smaller dataframe with only the necessary cols
def refine_events(filename):
    # Read file and create refined df
    df_events = pd.read_csv(filename, encoding='utf-8')
    df_refined = df_events[['GLOBALEVENTID']]
    
    # Date and time
    df_refined['Year'] = df_events['SQLDATE'].map(lambda x : str(x)[0:4])
    df_refined['Month'] = df_events['SQLDATE'].map(lambda x : str(x)[4:6])
    df_refined['Day'] = df_events['SQLDATE'].map(lambda x : str(x)[6:8])
    df_refined['DayTick'] = df_events['DATEADDED'].map(lambda x : date_to_daytick(str(x)))
      
    # Other relevant cols
    relevant_event_cols = ['Actor1Geo_CountryCode', 'Actor1Geo_Lat', 'Actor1Geo_Long', 'Actor2Geo_CountryCode', 'Actor2Geo_Lat', 'Actor2Geo_Long', 'NumMentions', 'NumSources', 'NumArticles']
    df_refined[relevant_event_cols] = df_events[relevant_event_cols]

    return df_refined

# Takes a csv file containing mention rows from the MENTIONS gdelt table and produces a smaller dataframe with only the necessary cols
def refine_mentions(filename):
    # Read file and create refined df
    df_mentions = pd.read_csv(filename, encoding='utf-8')
    df_refined = df_mentions[['GLOBALEVENTID']]
    
    # Date and time
    df_refined['Year'] = df_mentions['MentionTimeDate'].map(lambda x : str(x)[0:4])
    df_refined['Month'] = df_mentions['MentionTimeDate'].map(lambda x : str(x)[4:6])
    df_refined['Day'] = df_mentions['MentionTimeDate'].map(lambda x : str(x)[6:8])
    df_refined['DayTick'] = df_mentions['MentionTimeDate'].map(lambda x : date_to_daytick(str(x)))
    
    # Other relevant cols
    df_refined['Source'] = df_mentions['MentionSourceName']

    return df_refined

# Writes the reduced events dataframe to a .js file as an object, which will be directly used by the visualization
def write_events(df, filename):
    with open(filename, 'w') as file:
        file.write("var events = {\n")

        for index, row in df.iterrows():
            file.write("\t " + str(row['GLOBALEVENTID']) + 
                       ": { 'year':" + str(row['Year']) +
                       ", 'month':" + str(row['Month']) + 
                       ", 'day':" + str(row['Day']) + 
                       ", 'tick':" + str(row['DayTick']) +
                       ", 'country_1':'" + str(row['Actor1Geo_CountryCode']) +
                       "', 'lat_1':'" + str(row['Actor1Geo_Lat']) +
                       "', 'long_1':'" + str(row['Actor1Geo_Long']) +
                       "', 'country_2':'" + str(row['Actor2Geo_CountryCode']) +
                       "', 'lat_2':'" + str(row['Actor2Geo_Lat']) +
                       "', 'long_2':'" + str(row['Actor2Geo_Long']) +
                       "', 'mentions':" + str(row['NumMentions']) +
                       ", 'sources':" + str(row['NumSources']) +
                       ", 'articles':" + str(row['NumArticles']) + "}")
            if (index == df.shape[0]-1):
                file.write("\n")
            else:
                file.write(",\n")

        file.write("};")

# Writes the reduced events dataframe to a .js file as an object, which will be directly used by the visualization
def write_mentions(df, filename):
    with open(filename, 'w') as file:
        file.write("var mentions = {\n")

        for index, row in df.iterrows():
            file.write("\t " + str(row['GLOBALEVENTID']) + 
                       ": { 'year':" + str(row['Year']) +
                       ", 'month':" + str(row['Month']) + 
                       ", 'day':" + str(row['Day']) + 
                       ", 'tick':" + str(row['DayTick']) +
                       ", 'source':'" + str(row['Source']) + "'}")
            if (index == df.shape[0]-1):
                file.write("\n")
            else:
                file.write(",\n")

        file.write("};")

In [101]:
df_new_E = refine_events('20181025_15000_E.csv')
df_new_M = refine_mentions('20181025_15000_M.csv')

In [102]:
write_mentions(df_new_M, './../mentions.js')
write_events(df_new_E, './../events.js')