# Getting data using an API

Transports publics de Genève (public transportation in Geneva)

In [1]:
import requests
import pandas as pd
import configparser
import time

In [2]:
config = configparser.ConfigParser()

### Example config:
```config['tpg.ch'] = {}
config['tpg.ch']['key'] = 'Zapata_yaboom!'
with open('tpg.ini', 'w') as configfile:
    config.write(configfile)```

In [4]:
config.read('tpg.ini')
key = config['tpg.ch']['key']

# We get the current disruptions

In [5]:
getDisruptions = 'http://prod.ivtr-od.tpg.ch/v1/GetDisruptions?key={}'
responseDis = requests.get(getDisruptions.format(key))
dataDis = responseDis.json()

# Let's store it in a dataframe

In [11]:
df = pd.DataFrame(dataDis['disruptions'])

In [12]:
def add_warning(nature_label):
    if nature_label[:7] != 'Travaux' and nature_label[:10] != 'Rénovation':
        return 1
    else:
        return 0

In [13]:
df['warning'] = df['nature'].apply(add_warning)

In [14]:
df

Unnamed: 0,consequence,lineCode,nature,place,timestamp,warning
0,Dès le 30 septembre et pour une durée de 6 moi...,1,Travaux route de Frontenex,Uniquement direction Gradelle,2016-11-28T04:13:00+0100,0
1,Ligne détournée entre les arrêts Motta et Corn...,8,Travaux rue de Vermont,Direction Veyrier Douane / Tournettes,2016-11-28T04:10:00+0100,0
2,Dès le 30 septembre et pour une durée de 6 moi...,9,Travaux route de Frontenex,Uniquement direction Gradelle,2016-11-28T04:13:00+0100,0
3,Ligne détournée entre les arrêts Petit Bel-Air...,31,Travaux,Dans les deux sens,2016-11-28T07:51:00+0100,0
4,Dès le 30 septembre et pour une durée de 6 moi...,33,Travaux route de Frontenex,Uniquement direction Gradelle,2016-11-28T04:13:00+0100,0
5,Ligne détournée entre les arrêts Annemasse-Pos...,61,Travaux centre ville d'Annemasse,Direction Gare Cornavin,2016-11-28T04:12:00+0100,0
6,Dès le 30 septembre et pour une durée de 6 moi...,A,Travaux route de Frontenex,Uniquement direction Gradelle,2016-11-28T04:13:00+0100,0
7,Dès le 30 septembre et pour une durée de 6 moi...,NP,Travaux route de Frontenex,Uniquement direction Gradelle,2016-11-28T04:13:00+0100,0


In [15]:
datestring = time.strftime("%Y-%m-%d_%Hh%m")

In [16]:
df.to_csv("disruptions_{}.csv".format(datestring), index=False)

# Genève vu par Wikipédia

In [6]:
import requests, json, random, re, pandas as pd, time
from IPython.core.display import display, HTML

In [7]:
def query(request, limit):
    request['action'] = 'query'
    request['format'] = 'json'
    lastContinue = {'continue': ''}
    counter = 0
    while True:
        counter += 1
        if counter > limit:
            break
        # Clone original request
        req = request.copy()
        # Modify it with the values returned in the 'continue' section of the last result.
        req.update(lastContinue)
        # Call API
        result = requests.get('http://en.wikipedia.org/w/api.php', params=req).json()
        if 'error' in result:
            raise Error(result['error'])
        if 'warnings' in result:
            print(result['warnings'])
        if 'query' in result:
            yield result['query']
        if 'continue' not in result:
            break
        lastContinue = result['continue']

In [8]:
result_list = []

for result in query({'list':'search','srsearch': 'Genève', 'srwhat':'text'}, 3):
    print('Got {} new results'.format(len(result['search'])))
    result_list.extend(result['search'])

Got 10 new results
Got 10 new results
Got 10 new results


In [9]:
df = pd.DataFrame(result_list)
df.head()

Unnamed: 0,ns,size,snippet,timestamp,title,wordcount
0,0,100443,"industriels, Archives d'État de <span class=""s...",2016-11-14T05:47:15Z,Geneva,10419
1,0,5033,"&quot;Le Stade de <span class=""searchmatch"">Ge...",2016-11-26T15:11:48Z,Stade de Genève,366
2,0,4930,Sports. Originally founded as the <span clas...,2016-05-29T23:45:52Z,Lions de Genève,220
3,0,8475,"Vernets. 1963 : Creation of <span class=""searc...",2016-10-26T12:07:18Z,Genève-Servette HC,868
4,0,2465,"Banque cantonale de <span class=""searchmatch"">...",2016-10-25T11:12:48Z,Banque cantonale de Genève,157


In [10]:
def get_day(timestamp):
    return time.strftime('%B %Y', time.strptime(timestamp.split('T')[0], '%Y-%m-%d'))
df['date'] = df['timestamp'].apply(get_day)

In [11]:
json_list = []
for date, group in df.groupby('date'):
    titles = []
    for index, row in group.iterrows():
        titles.append(row['title'])
    json_list.append({'name':date, 'words':titles})
json_list

[{'name': 'April 2016', 'words': ['École Japonaise Complémentaire de Genève']},
 {'name': 'August 2013', 'words': ['Geneva University of Music']},
 {'name': 'February 2016', 'words': ["Musée d'ethnographie de Genève"]},
 {'name': 'January 2016', 'words': ['Orchestre de chambre de Genève']},
 {'name': 'July 2016',
  'words': ['Genève-Cornavin railway station', 'Trolleybuses in Geneva']},
 {'name': 'June 2015', 'words': ['Gli Angeli Genève']},
 {'name': 'March 2016',
  'words': ['Urania Genève Sport', 'Racing Club Genève']},
 {'name': 'May 2016', 'words': ['Lions de Genève']},
 {'name': 'November 2014', 'words': ['Conservatoire de Musique de Genève']},
 {'name': 'November 2016',
  'words': ['Geneva',
   'Stade de Genève',
   'Universal Genève',
   'Tribune de Genève',
   'Fêtes de Genève',
   'Centre d’Art Contemporain Genève',
   'Servette FC',
   'Canton of Geneva',
   'Collège Calvin']},
 {'name': 'October 2016',
  'words': ['Genève-Servette HC',
   'Banque cantonale de Genève',
   'P

In [12]:
data = []    

## return an array with the other nodes to connect to
def import_words(word_list):
    lines = []
    for word in word_list:
        lines.append('Title.' + word)
    return lines

def import_authors(author_list):
    lines = []
    for author in author_list:
        lines.append('Author.' + author)
    return lines

## create the node data
def generate_word(word, deputee):
    timports = []
    timports.extend(import_authors(deputee))
    
    element = {
        "name": "Title." + word,
        "size": 0,
        "imports": import_authors(deputee)
    }
    return element

def generate_author(deputee, words):
    element = {
        "name": "Author." + deputee,
        "size": 0,
        "imports": import_words(words)
    }
    return element

for item in json_list:
    deputee_name = item['name']

    for single_word in item['words']:
        data.append(generate_word(single_word, [deputee_name]))

    deputee = generate_author(deputee_name, item['words'])
    data.append(deputee)

## export
with open('wiki_titles.json', 'w') as outfile:
    json.dump(data, outfile, indent=4, separators=(',', ': '))

In [14]:
# surprise
# display(HTML('<iframe src="http://rospo.local/~paul/Wiki_dataviz/dataviz.html" width="900" height="900"></iframe>'))