In [1]:
import folium
import pandas as pd
import geopandas as gpd
import os
import json

In [2]:
from cassandra.cluster import Cluster

cluster = Cluster(['ec2-35-170-17-129.compute-1.amazonaws.com'])
session = cluster.connect('gdelt')

query = "SELECT day, country, frequency FROM mentions_by_location_eventcode WHERE location = 'Las Vegas' AND eventcode = 180"
mentions = pd.DataFrame(list(session.execute(query)))

  This is separate from the ipykernel package so we can avoid doing imports until


In [3]:
mentions['day'] = mentions['day'].apply(lambda x: x[:-1])
mentions.head()

Unnamed: 0,day,country,frequency
0,20171002,EI,1
1,20171002,US,3
2,20171002,EI,1
3,20171002,UK,1
4,20171002,US,14


In [4]:
mentions = mentions.groupby(['day', 'country'], as_index=False).sum()
mentions.head()

Unnamed: 0,day,country,frequency
0,20171002,EI,2
1,20171002,IS,1
2,20171002,UK,1
3,20171002,US,26
4,20171003,CA,2


In [5]:
mentions['day'] = pd.to_datetime(mentions['day'])
mentions['day'] = mentions['day'].apply(lambda x: x.strftime('%s'))
mentions.head()

Unnamed: 0,day,country,frequency
0,1506895200,EI,2
1,1506895200,IS,1
2,1506895200,UK,1
3,1506895200,US,26
4,1506981600,CA,2


In [6]:
mentions_by_day = mentions.pivot(index='day', columns='country', values='frequency')
mentions_by_day.head()

country,AE,CA,CS,EI,GH,IS,KS,MY,NZ,RP,RS,UK,US,VM
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1506895200,,,,2.0,,1.0,,,,,,1.0,26.0,
1506981600,,2.0,8.0,,,3.0,,1.0,1.0,2.0,1.0,7.0,38.0,
1507068000,2.0,2.0,,1.0,,2.0,,,,6.0,,,83.0,1.0
1507154400,,,,,,,,,,,,,24.0,
1507240800,,,,,,,,,,,,,9.0,


In [7]:
mentions_by_day.fillna(0.0, inplace=True)
mentions_by_day.head()

country,AE,CA,CS,EI,GH,IS,KS,MY,NZ,RP,RS,UK,US,VM
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1506895200,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,26.0,0.0
1506981600,0.0,2.0,8.0,0.0,0.0,3.0,0.0,1.0,1.0,2.0,1.0,7.0,38.0,0.0
1507068000,2.0,2.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,83.0,1.0
1507154400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0
1507240800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0


In [8]:
medias_by_country = pd.read_csv('domainCountry.csv').groupby(by='FIPSCountryCode').size()
medias_by_country.head()

FIPSCountryCode
AA     1
AC     1
AE    61
AF    31
AG    50
dtype: int64

In [9]:
ratios = medias_by_country[mentions_by_day.columns]
ratios

country
AE       61
CA      829
CS       30
EI      224
GH       45
IS      310
KS      126
MY       66
NZ      108
RP      149
RS      701
UK     1469
US    12331
VM      219
dtype: int64

In [10]:
mentions_by_day = mentions_by_day.apply(lambda row: row/ratios, axis=1)
mentions_by_day.head()

country,AE,CA,CS,EI,GH,IS,KS,MY,NZ,RP,RS,UK,US,VM
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1506895200,0.0,0.0,0.0,0.008929,0.0,0.003226,0.0,0.0,0.0,0.0,0.0,0.000681,0.002109,0.0
1506981600,0.0,0.002413,0.266667,0.0,0.0,0.009677,0.0,0.015152,0.009259,0.013423,0.001427,0.004765,0.003082,0.0
1507068000,0.032787,0.002413,0.0,0.004464,0.0,0.006452,0.0,0.0,0.0,0.040268,0.0,0.0,0.006731,0.004566
1507154400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001946,0.0
1507240800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00073,0.0


In [11]:
#max_mentions = mentions_by_day.max().max()
max_mentions = mentions_by_day.max()
mentions_by_day = mentions_by_day/max_mentions
mentions_by_day.head()

country,AE,CA,CS,EI,GH,IS,KS,MY,NZ,RP,RS,UK,US,VM
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1506895200,0.0,0.0,0.0,1.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.142857,0.313253,0.0
1506981600,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.333333,0.5,1.0,0.457831,0.0
1507068000,1.0,1.0,0.0,0.5,0.0,0.666667,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
1507154400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.289157,0.0
1507240800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108434,0.0


In [12]:
fips_to_iso3 = pd.read_csv('iso3_fips.csv', sep=';', index_col='fips')

def convert_countrycode(fips):
    iso3 = fips_to_iso3.loc[fips]['iso3']
    return iso3

mentions_by_day.rename(convert_countrycode, axis='columns', inplace=True)
mentions_by_day.head()

country,ARE,CAN,CRI,IRL,GHA,ISR,KOR,MYS,NZL,PHL,RUS,GBR,USA,VNM
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1506895200,0.0,0.0,0.0,1.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.142857,0.313253,0.0
1506981600,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.333333,0.5,1.0,0.457831,0.0
1507068000,1.0,1.0,0.0,0.5,0.0,0.666667,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
1507154400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.289157,0.0
1507240800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108434,0.0


In [13]:
assert 'naturalearth_lowres' in gpd.datasets.available
datapath = gpd.datasets.get_path('naturalearth_lowres')
gdf = gpd.read_file(datapath)
gdf = gdf.set_index('iso_a3')
gdf.head()

Unnamed: 0_level_0,pop_est,continent,name,gdp_md_est,geometry
iso_a3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AFG,28400000.0,Asia,Afghanistan,22270.0,"POLYGON ((61.21081709172574 35.65007233330923,..."
AGO,12799293.0,Africa,Angola,110300.0,(POLYGON ((16.32652835456705 -5.87747039146621...
ALB,3639453.0,Europe,Albania,21810.0,"POLYGON ((20.59024743010491 41.85540416113361,..."
ARE,4798491.0,Asia,United Arab Emirates,184300.0,"POLYGON ((51.57951867046327 24.24549713795111,..."
ARG,40913584.0,South America,Argentina,573900.0,(POLYGON ((-65.50000000000003 -55.199999999999...


In [14]:
gdelt = mentions_by_day.applymap(lambda cell: {'color': '#ff0000', 'opacity':cell})
gdelt.head()

country,ARE,CAN,CRI,IRL,GHA,ISR,KOR,MYS,NZL,PHL,RUS,GBR,USA,VNM
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1506895200,"{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.333333333333...","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.142857142857...","{'color': '#ff0000', 'opacity': 0.313253012048...","{'color': '#ff0000', 'opacity': 0.0}"
1506981600,"{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 0.333333333333...","{'color': '#ff0000', 'opacity': 0.5}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 0.457831325301...","{'color': '#ff0000', 'opacity': 0.0}"
1507068000,"{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.5}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.666666666666...","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 1.0}","{'color': '#ff0000', 'opacity': 1.0}"
1507154400,"{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.289156626506...","{'color': '#ff0000', 'opacity': 0.0}"
1507240800,"{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.0}","{'color': '#ff0000', 'opacity': 0.108433734939...","{'color': '#ff0000', 'opacity': 0.0}"


In [15]:
styledict = gdelt.to_dict()
styledict

{'ARE': {'1506895200': {'color': '#ff0000', 'opacity': 0.0},
  '1506981600': {'color': '#ff0000', 'opacity': 0.0},
  '1507068000': {'color': '#ff0000', 'opacity': 1.0},
  '1507154400': {'color': '#ff0000', 'opacity': 0.0},
  '1507240800': {'color': '#ff0000', 'opacity': 0.0},
  '1507327200': {'color': '#ff0000', 'opacity': 0.0},
  '1507413600': {'color': '#ff0000', 'opacity': 0.0},
  '1507500000': {'color': '#ff0000', 'opacity': 0.0},
  '1507586400': {'color': '#ff0000', 'opacity': 0.0},
  '1507672800': {'color': '#ff0000', 'opacity': 0.0},
  '1507759200': {'color': '#ff0000', 'opacity': 0.0},
  '1507845600': {'color': '#ff0000', 'opacity': 0.0},
  '1507932000': {'color': '#ff0000', 'opacity': 0.0},
  '1508018400': {'color': '#ff0000', 'opacity': 0.0},
  '1508104800': {'color': '#ff0000', 'opacity': 0.0},
  '1508191200': {'color': '#ff0000', 'opacity': 0.0},
  '1508277600': {'color': '#ff0000', 'opacity': 0.0},
  '1508450400': {'color': '#ff0000', 'opacity': 0.0},
  '1508536800': {'col

In [16]:
from branca.element import Figure, JavascriptLink
from folium.features import GeoJson
from jinja2 import Template

class TimeSliderChoropleth(GeoJson):
    """
    Creates a TimeSliderChoropleth plugin to append into a map with Map.add_child.
    Parameters
    ----------
    data: str
        geojson string
    styledict: dict
        A dictionary where the keys are the geojson feature ids and the values are
        dicts of `{time: style_options_dict}`
    """
    def __init__(self, data, styledict, name=None, overlay=True, control=True, **kwargs):
        super(TimeSliderChoropleth, self).__init__(data, name=name, overlay=overlay, control=control)
        if not isinstance(styledict, dict):
            raise ValueError('styledict must be a dictionary, got {!r}'.format(styledict))
        for val in styledict.values():
            if not isinstance(val, dict):
                raise ValueError('Each item in styledict must be a dictionary, got {!r}'.format(val))

        # Make set of timestamps.
        timestamps = set()
        for feature in styledict.values():
            timestamps.update(set(feature.keys()))
        timestamps = sorted(list(timestamps))

        self.timestamps = json.dumps(timestamps)
        self.styledict = json.dumps(styledict, sort_keys=True, indent=2)

        self._template = Template(u"""
            {% macro script(this, kwargs) %}
                var timestamps = {{ this.timestamps }};
                var styledict = {{ this.styledict }};
                var current_timestamp = timestamps[0];
                // insert time slider
                d3.select("body").insert("p", ":first-child").append("input")
                    .attr("type", "range")
                    .attr("width", "100px")
                    .attr("min", 0)
                    .attr("max", timestamps.length - 1)
                    .attr("value", 0)
                    .attr("id", "slider")
                    .attr("step", "1")
                    .style('align', 'center');
                // insert time slider output BEFORE time slider (text on top of slider)
                d3.select("body").insert("p", ":first-child").append("output")
                    .attr("width", "100")
                    .attr("id", "slider-value")
                    .style('font-size', '18px')
                    .style('text-align', 'center')
                    .style('font-weight', '500%');
                var datestring = new Date(parseInt(current_timestamp)*1000).toDateString();
                d3.select("output#slider-value").text(datestring);
                fill_map = function(){
                    for (var feature_id in styledict){
                        let style = styledict[feature_id]//[current_timestamp];
                        var fillColor = 'white';
                        var opacity = 0;
                        if (current_timestamp in style){
                            fillColor = style[current_timestamp]['color'];
                            opacity = style[current_timestamp]['opacity'];
                            d3.selectAll('#feature-'+feature_id
                            ).attr('fill', fillColor)
                            .style('fill-opacity', opacity);
                        }
                    }
                }
                d3.select("#slider").on("input", function() {
                    current_timestamp = timestamps[this.value];
                var datestring = new Date(parseInt(current_timestamp)*1000).toDateString();
                d3.select("output#slider-value").text(datestring);
                fill_map();
                });
                {% if this.highlight %}
                    {{this.get_name()}}_onEachFeature = function onEachFeature(feature, layer) {
                        layer.on({
                            mouseout: function(e) {
                            if (current_timestamp in styledict[e.target.feature.id]){
                                var opacity = styledict[e.target.feature.id][current_timestamp]['opacity'];
                                d3.selectAll('#feature-'+e.target.feature.id).style('fill-opacity', opacity);
                            }
                        },
                            mouseover: function(e) {
                            if (current_timestamp in styledict[e.target.feature.id]){
                                d3.selectAll('#feature-'+e.target.feature.id).style('fill-opacity', 1);
                            }
                        },
                            click: function(e) {
                                {{this._parent.get_name()}}.fitBounds(e.target.getBounds());
                        }
                        });
                    };
                {% endif %}
                var {{this.get_name()}} = L.geoJson(
                    {% if this.embed %}{{this.style_data()}}{% else %}"{{this.data}}"{% endif %}
                    {% if this.smooth_factor is not none or this.highlight %}
                        , {
                        {% if this.smooth_factor is not none  %}
                            smoothFactor:{{this.smooth_factor}}
                        {% endif %}
                        {% if this.highlight %}
                            {% if this.smooth_factor is not none  %}
                            ,
                            {% endif %}
                            onEachFeature: {{this.get_name()}}_onEachFeature
                        {% endif %}
                        }
                    {% endif %}
                    ).addTo({{this._parent.get_name()}}
                );
            {{this.get_name()}}.setStyle(function(feature) {feature.properties.style;});
                {{ this.get_name() }}.eachLayer(function (layer) {
                    layer._path.id = 'feature-' + layer.feature.id;
                    });
                d3.selectAll('path')
                .attr('stroke', 'white')
                .attr('stroke-width', 0.8)
                .attr('stroke-dasharray', '5,5')
                .attr('fill-opacity', 0);
                fill_map();
            {% endmacro %}
            """)

    def render(self, **kwargs):
        super(TimeSliderChoropleth, self).render(**kwargs)
        figure = self.get_root()
        assert isinstance(figure, Figure), ('You cannot render this Element '
                                            'if it is not in a Figure.')
        figure.header.add_child(JavascriptLink('https://d3js.org/d3.v4.min.js'), name='d3v4')

In [17]:
m = folium.Map([0, 0], zoom_start=2)

g = TimeSliderChoropleth(
    gdf.to_json(),
    styledict=styledict,

).add_to(m)

m

In [18]:
m.save(os.path.join('gdelt_viz.html'))

import webbrowser
webbrowser.open('file://' + os.path.realpath('gdelt_viz.html'))

True