In [13]:
from SPARQLWrapper import SPARQLWrapper, JSON
import json
import re
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [14]:
sparql = SPARQLWrapper("http://dbpedia-historique.inria.fr/sparql")
sparql.setQuery("""prefix db:   <http://fr.dbpedia.org/resource/>
prefix dbo:  <http://dbpedia.org/ontology/>
prefix dbfr: <http://ns.inria.fr/dbpediafr/voc#>
prefix foaf: <http://xmlns.com/foaf/0.1/>
prefix dc:   <http://purl.org/dc/element/1.1/>

select  * where {

        {select distinct * where {
            db:Élection_présidentielle_américaine_de_2016 dbo:wikiPageWikiLink/(dbo:wikiPageRedirects)? ?res .
            ?res a foaf:Person
        }}
   
   
            ?rev dc:date ?date .
            filter contains(str(?date), "2016")
            ?x dbfr:revPerMonth ?rev .
            ?x foaf:primaryTopic ?res .
            ?rev rdf:value ?c .
       
   
}""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [23]:
def convert_date(datestring, month=True):
    if month:
        return pd.to_datetime(datestring, format="%m/%Y")
    else:
        return pd.to_datetime(datestring, format="%Y")
        

In [78]:
from collections import defaultdict
tmp = defaultdict(set)
for record in results["results"]["bindings"]:
    date = convert_date(record['date']['value'])
    node_id = record['rev']['value']
    uri = record['res']['value']
    num_rev = int(record['c']['value'])
    tmp[uri].add((date, num_rev))  # avoid duplicates

dic = defaultdict(list, ((k, list(v)) for k, v in dic.items()))

In [79]:
# uri = 'http://fr.dbpedia.org/resource/Ted_Cruz'
# uris = ['http://fr.dbpedia.org/resource/Donald_Trump', 'http://fr.dbpedia.org/resource/Mike_Huckabee']
dfs = []
for uri, values in dic.items():
    k = uri.split("/")[-1:][0]
    d = sorted(values, key=lambda x: x[0])
    df = pd.DataFrame.from_records(d, columns=['dateval', k])
    df.index = pd.DatetimeIndex(df.dateval)
    del df['dateval']
    dfs.append(df)
    
df = pd.concat(dfs, axis=1).fillna(0)
df.head()


Unnamed: 0,Ted_Cruz,Donald_Trump,Mike_Huckabee,Bill_Clinton,Gary_Earl_Johnson,Lincoln_Chafee,Rick_Perry,Al_Gore,Hillary_Clinton,George_H._W._Bush,...,Brian_Schweitzer,Bernie_Sanders,Jeb_Bush,Bobby_Jindal,Martin_O'Malley,John_Kasich,Chris_Christie,Rand_Paul,Paul_Ryan,"Jon_Huntsman,_Jr."
2016-01-01,4,81.0,0.0,2,0.0,0.0,0.0,1.0,7,0.0,...,0.0,39,2.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
2016-02-01,31,91.0,2.0,1,1.0,2.0,2.0,0.0,24,5.0,...,0.0,94,11.0,0.0,7.0,4.0,3.0,3.0,0.0,1.0
2016-03-01,24,179.0,0.0,5,1.0,0.0,0.0,4.0,32,2.0,...,1.0,62,5.0,2.0,0.0,19.0,1.0,1.0,2.0,0.0
2016-04-01,1,0.0,0.0,1,0.0,0.0,0.0,1.0,4,0.0,...,0.0,17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [77]:
df.sum()

Ted_Cruz                           60.0
Donald_Trump                      351.0
Mike_Huckabee                       2.0
Bill_Clinton                        9.0
Gary_Earl_Johnson                   2.0
Lincoln_Chafee                      2.0
Rick_Perry                          2.0
Lawrence_Lessig                     3.0
Hillary_Clinton                    67.0
George_H._W._Bush                   7.0
Marco_Rubio                        25.0
Jim_Webb                            3.0
Michael_Bloomberg                   6.0
George_W._Bush                     17.0
Lindsey_Graham                      1.0
Franklin_Delano_Roosevelt          17.0
Joe_Biden                          16.0
Al_Gore                             6.0
Scott_Walker_(homme_politique)      2.0
Barack_Obama                       35.0
Brian_Schweitzer                    1.0
Bernie_Sanders                    212.0
Jeb_Bush                           18.0
Bobby_Jindal                        3.0
Martin_O'Malley                     7.0
