In [45]:
# The purpose of this script is to serve as data ingestion; create json files containing the viewcount data from 
# the PageCount (legacy) and PageView API. 

import json
import requests

In [46]:
#  Here are the URL links for both the PageCount (legacy) and PageView API

endpoint_legacy = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access-site}/{granularity}/{start}/{end}'

endpoint_pageviews = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'

In [47]:
# Here we define 5 API calls: 2 are calls to the PageCount API for its desktop and mobile viewcounts, 3 are calls
# to the PageView API for its desktop, mobile, and website view counts

desktop_params_legacy = {"project" : "en.wikipedia.org",
                 "access-site" : "desktop-site",
                 "granularity" : "monthly",
                 "start" : "2007120100",
                # for end use 1st day of month following final month of data
                 "end" : "2016070100"
                    }

mobile_params_legacy = {"project" : "en.wikipedia.org",
                 "access-site" : "mobile-site",
                 "granularity" : "monthly",
                 "start" : "2007120100",
                # for end use 1st day of month following final month of data
                 "end" : "2016070100"
                    }

desktop_params_pageviews = {"project" : "en.wikipedia.org",
                    "access" : "desktop",
                    "agent" : "user",
                    "granularity" : "monthly",
                    "start" : "2015070100",
                    # for end use 1st day of month following final month of data
                    "end" : '2021091000'
                        }

mobile_params_pageviews = {"project" : "en.wikipedia.org",
                    "access" : "mobile-app",
                    "agent" : "user",
                    "granularity" : "monthly",
                    "start" : "2015070100",
                    # for end use 1st day of month following final month of data
                    "end" : '2021091000'
                        }

website_params_pageviews = {"project" : "en.wikipedia.org",
                    "access" : "mobile-web",
                    "agent" : "user",
                    "granularity" : "monthly",
                    "start" : "2015070100",
                    # for end use 1st day of month following final month of data
                    "end" : '2021091000'
                        }


# Specifying the headers using my github account
headers = {
    'User-Agent': 'https://github.com/mcb2016',
    'From': 'youremail@uw.edu'
}

In [48]:
# Here we'll define a function that makes the call to the APIs mentioned prior

def api_call(endpoint,parameters):
    call = requests.get(endpoint.format(**parameters), headers=headers)
    response = call.json()
    
    return response

In [49]:
# Using the function defined above, we load in the view count data as json data for each of the 5 calls we defined

desktop_monthly_legacy = api_call(endpoint_legacy, desktop_params_legacy)

mobile_monthly_legacy = api_call(endpoint_legacy, mobile_params_legacy)

website_monthly_pageviews = api_call(endpoint_pageviews, website_params_pageviews)

desktop_monthly_pageviews = api_call(endpoint_pageviews, desktop_params_pageviews)

mobile_monthly_pageviews = api_call(endpoint_pageviews, mobile_params_pageviews)

In [50]:
# We'll write each of the 5 json data that we loaded in above to the json files for our raw data

with open('../raw_data/legacy_desktop_200712-201607.json', 'w', encoding='utf-8') as outfile:
    json.dump(desktop_monthly_legacy, outfile)
    
with open('../raw_data/legacy_mobile_200712-201607.json', 'w', encoding='utf-8') as outfile:
    json.dump(mobile_monthly_legacy, outfile)
    
with open('../raw_data/pageviews_website_201507-202109.json', 'w', encoding='utf-8') as outfile:
    json.dump(website_monthly_pageviews, outfile)
    
with open('../raw_data/pageviews_desktop_201507-202109.json', 'w', encoding='utf-8') as outfile:
    json.dump(desktop_monthly_pageviews, outfile)
    
with open('../raw_data/pageviews_mobile_201507-202109.json', 'w', encoding='utf-8') as outfile:
    json.dump(mobile_monthly_pageviews, outfile)