# Apple Mobility Trends Report

Data from https://covid19.apple.com/mobility

In [None]:
import pandas as pd
import numpy as np
import pycountry
import json
from datetime import datetime
from functools import reduce
import requests
from io import StringIO
import re

In [None]:
# papermill parameters
output_folder = "../output/"

In [None]:
# get today's path
prefix = "https://covid19-static.cdn-apple.com"
response = requests.get(prefix + "/covid19-mobility-data/current/v3/index.json")
assert response.status_code == 200
body = json.loads(response.text)
# get data version
version = body["basePath"]
csv_path = body["regions"]["en-us"]["csvPath"]

path = prefix + version + csv_path
file_response = requests.get(path)

In [None]:
df = pd.read_csv(StringIO(file_response.content.decode("utf-8")))

In [None]:
subdivisions = {}
def resolve_region(geo_type, region, sub_region, country):
    
    if geo_type == "country/region":
        # get iso_3166_1
        pycountry_object = pycountry.countries.get(name=region) or pycountry.countries.search_fuzzy(region)
        if pycountry_object:
            if isinstance(pycountry_object, list):
                pycountry_object = pycountry_object[0]
            country_code = pycountry_object.alpha_2
            country_name = pycountry_object.name
            df.loc[(df["geo_type"] == geo_type) & (df["region"] == region), "ISO3166-1"] = country_code
            df.loc[(df["geo_type"] == geo_type) & (df["region"] == region), "region"] = country_name
        else:
            country_code
            print(f"unable to parse country: {country}")
    else:
        pycountry_object = pycountry.countries.get(name=country) or pycountry.countries.search_fuzzy(country)
        if pycountry_object:
            if isinstance(pycountry_object, list):
                pycountry_object = pycountry_object[0]
            country_code = pycountry_object.alpha_2
            # country_name = pycountry_object.name
            df.loc[(df["geo_type"] == geo_type) & (df["country"] == country), "ISO3166-1"] = country_code
            # df.loc[(df["geo_type"] == geo_type) & (df["country"] == country), "region"] = country_name
        else:
            print(f"unable to parse non-country: {country}")
    
    if country_code not in list(subdivisions):
        sub = pycountry.subdivisions.get(country_code=country_code)
        subdivision_dict = {subdivision.name: subdivision.code for subdivision in sub}
        subdivisions[country_code] = subdivision_dict
    name = np.nan
    if geo_type != "country/region" and country_code in list(subdivisions):
        if geo_type in ["county", "city"]:
            # sub-region
            try:
                sub_region_striped = re.match(r"^^(?:Canton of )?(.*?)(?:\s(?:Region|County|Prefecture|Province|\(\w+\)).*)?$", sub_region).group(1)
                code = subdivisions[country_code][sub_region_striped]
                code = re.sub(r"^\w*?-?(\w+)$", r"\1", code)
                df.loc[
                    (df["geo_type"] == geo_type) & 
                    (df["ISO3166-1"] == country_code) & 
                    (df["sub-region"] == sub_region),
                "ISO3166-2"] = code
            except:
                pass
        elif geo_type == "sub-region":
            # region name
            try:
                region_striped = re.match(r"^^(?:Canton of )?(.*?)(?:\s(?:Region|County|Prefecture|Province|\(\w+\)).*)?$", region).group(1)
                code = subdivisions[country_code][region_striped]
                code = re.sub(r"^\w*?-?(\w+)$", r"\1", code)
                df.loc[
                    (df["geo_type"] == geo_type) & 
                    (df["ISO3166-1"] == country_code) & 
                    (df["region"] == region),
                "ISO3166-2"] = code
            except:
                pass
        else:
            pass
    

In [None]:
df["ISO3166-1"] = ""
df["ISO3166-2"] = ""

In [None]:
places = df[["geo_type", "region", "sub-region", "country"]].fillna("").groupby(["geo_type", "region", "sub-region", "country"])
for row in places:
    resolve_region(*row[0])
    

In [None]:
cols = list(filter(lambda col: not re.search(r"\d{4}-\d{2}-\d{2}", col), list(df.columns)))
vals = list(filter(lambda col: re.search(r"\d{4}-\d{2}-\d{2}", col), list(df.columns)))

In [None]:
# unpivot df
df = pd.melt(df, value_vars=vals, id_vars=cols)

In [None]:
df.loc[df["ISO3166-2"] == "", "ISO3166-2"] = np.nan

In [None]:
df.sample(25)

In [None]:
column_map = {
    "region": "COUNTRY/REGION",
    "sub-region": "PROVINCE/STATE",
    "variable": "DATE",
    "value": "DIFFERENCE",
    "transportation_type": "TRANSPORTATION_TYPE"
}
df = df.rename(columns=column_map)

In [None]:
df["Last_Updated_Date"] = datetime.utcnow()
df['Last_Reported_Flag'] = df["DATE"].max() == df["DATE"]

In [None]:
df.to_csv(output_folder + "APPLE_MOBILITY.csv", index=False, columns=["COUNTRY/REGION",
                                                                      "PROVINCE/STATE",
                                                                      "DATE",
                                                                      "TRANSPORTATION_TYPE",
                                                                      "DIFFERENCE",
                                                                      "ISO3166-1",
                                                                      "ISO3166-2",
                                                                      "Last_Updated_Date",
                                                                      "Last_Reported_Flag"
                                                                    ])