In [1]:
import pandas as pd
import numpy as np
import json
import requests

In [2]:
def api_call(endpoint):
    call = requests.get(endpoint)
    response = call.json()
    
    return response

## Legacy desktop

In [3]:
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/all-projects/desktop-site/monthly/2007120100/2016080100'

In [4]:
calls = api_call(endpoint)

In [5]:
df = pd.DataFrame(calls['items'])
df.head()

Unnamed: 0,access-site,count,granularity,project,timestamp
0,desktop-site,5378180538,monthly,all-projects,2007120100
1,desktop-site,9154428521,monthly,all-projects,2008010100
2,desktop-site,8843288153,monthly,all-projects,2008020100
3,desktop-site,9191926623,monthly,all-projects,2008030100
4,desktop-site,9645856901,monthly,all-projects,2008040100


In [6]:
with open('pagecounts_desktop-site_200712-201608.json', 'w') as f:
    json.dump(calls, f)

## Legacy mobile

In [7]:
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/all-projects/mobile-site/monthly/2007120100/2016080100'

In [8]:
calls = api_call(endpoint)

In [9]:
df = pd.DataFrame(calls['items'])
df.head()

Unnamed: 0,access-site,count,granularity,project,timestamp
0,mobile-site,6485,monthly,all-projects,2008050100
1,mobile-site,1218,monthly,all-projects,2012060100
2,mobile-site,15584,monthly,all-projects,2012070100
3,mobile-site,59161,monthly,all-projects,2012080100
4,mobile-site,52675,monthly,all-projects,2012090100


In [10]:
with open('pagecounts_mobile-site_200712-201608.json', 'w') as f:
    json.dump(calls, f)

## Pageviews desktop

In [15]:
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/all-projects/desktop/user/monthly/2015070100/2019090100'

In [16]:
calls = api_call(endpoint)

In [18]:
df = pd.DataFrame(calls['items'])
df.head()

Unnamed: 0,access,agent,granularity,project,timestamp,views
0,desktop,user,monthly,all-projects,2015070100,8934372444
1,desktop,user,monthly,all-projects,2015080100,8965109595
2,desktop,user,monthly,all-projects,2015090100,9277717543
3,desktop,user,monthly,all-projects,2015100100,9337128937
4,desktop,user,monthly,all-projects,2015110100,9167636024


In [20]:
with open('pageviews_desktop_201507-201908.json', 'w') as f:
    json.dump(calls, f)

## Pageviews mobile-app

In [25]:
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/all-projects/mobile-app/user/monthly/2015070100/2019090100'

In [26]:
calls = api_call(endpoint)

In [30]:
df = pd.DataFrame(calls['items'])
print(df.shape)
df.head()

(50, 6)
       access agent granularity       project   timestamp      views
0  mobile-app  user     monthly  all-projects  2015070100  213708549
1  mobile-app  user     monthly  all-projects  2015080100  217111626
2  mobile-app  user     monthly  all-projects  2015090100  194275974
3  mobile-app  user     monthly  all-projects  2015100100  195293620
4  mobile-app  user     monthly  all-projects  2015110100  196280768


In [31]:
with open('pageviews_mobile-app_201507-201908.json', 'w') as f:
    json.dump(calls, f)

## Pageviews mobile-web

In [32]:
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/all-projects/mobile-web/user/monthly/2015070100/2019090100'

In [33]:
calls = api_call(endpoint)

In [34]:
df = pd.DataFrame(calls['items'])
print(df.shape)
df.head()

(50, 6)


Unnamed: 0,access,agent,granularity,project,timestamp,views
0,mobile-web,user,monthly,all-projects,2015070100,6391937355
1,mobile-web,user,monthly,all-projects,2015080100,6583631660
2,mobile-web,user,monthly,all-projects,2015090100,6427107566
3,mobile-web,user,monthly,all-projects,2015100100,6704099204
4,mobile-web,user,monthly,all-projects,2015110100,6730545748


In [35]:
with open('pageviews_mobile-web_201507-201908.json', 'w') as f:
    json.dump(calls, f)

## Read in data

In [89]:
with open('pagecounts_desktop-site_200712-201608.json', 'r') as f:
    counts_desktop = json.load(f)

In [90]:
counts_desktop = pd.DataFrame(counts_desktop['items'])

In [95]:
counts_desktop.head()

Unnamed: 0,counts_desktop,timestamp
0,5378180538,2007120100
1,9154428521,2008010100
2,8843288153,2008020100
3,9191926623,2008030100
4,9645856901,2008040100


In [92]:
counts_desktop.drop(columns=['access-site','granularity','project'], inplace=True)

In [94]:
counts_desktop.columns = ['counts_desktop','timestamp']

In [96]:
with open('pagecounts_mobile-site_200712-201608.json', 'r') as f:
    counts_mobile = json.load(f)

In [97]:
counts_mobile = pd.DataFrame(counts_mobile['items'])

In [105]:
counts_mobile.head()

Unnamed: 0,counts_mobile,timestamp
0,6485,2008050100
1,1218,2012060100
2,15584,2012070100
3,59161,2012080100
4,52675,2012090100


In [99]:
counts_mobile.drop(columns=['access-site','granularity','project'], inplace=True)

In [100]:
counts_mobile.columns = ['counts_mobile','timestamp']

In [102]:
df = pd.merge(counts_desktop, counts_mobile, on='timestamp', how='outer')

In [103]:
df

Unnamed: 0,counts_desktop,timestamp,counts_mobile
0,5378180538,2007120100,
1,9154428521,2008010100,
2,8843288153,2008020100,
3,9191926623,2008030100,
4,9645856901,2008040100,
5,10696423954,2008050100,6.485000e+03
6,10862643783,2008060100,
7,9872117095,2008070100,
8,9821076284,2008080100,
9,10634498608,2008090100,
