# Get daily Wikipedia pageviews for topic(s)

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
today = pd.to_datetime("today").strftime("%Y%m%d00")

---

## Enter two topics to compare

In [6]:
topics = ["Dog", "Cat"]
# topics = ["Recession"]

In [7]:
dataframes = []

for t in topics:
    df_src = pd.json_normalize(
        pd.read_json(
            f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{t}/daily/2015070100/{today}"
        )["items"]
    ).sort_values("views", ascending=False)
    dataframes.append(df_src)
    src = pd.concat(dataframes)

In [8]:
src["date"] = pd.to_datetime(src["timestamp"].str[:8])

In [9]:
src["subject"] = src["article"].str.replace("_", " ", regex=False)

In [10]:
df = src.drop(["article", "project", "timestamp", "access", "agent"], axis=1)

In [11]:
df.head()

Unnamed: 0,granularity,views,date,subject
1995,daily,28038,2020-12-16,Dog
695,daily,27789,2017-05-26,Dog
1733,daily,24766,2020-03-29,Dog
1734,daily,23339,2020-03-30,Dog
856,daily,21382,2017-11-03,Dog


In [12]:
df.tail()

Unnamed: 0,granularity,views,date,subject
1620,daily,5277,2019-12-07,Cat
1431,daily,5239,2019-06-01,Cat
907,daily,5215,2017-12-24,Cat
1627,daily,5133,2019-12-14,Cat
906,daily,5102,2017-12-23,Cat


---

In [13]:
alt.Chart(df).mark_bar(size=2).encode(
    x=alt.X("date", axis=alt.Axis(format="%b. %Y"), title=""),
    y=alt.Y("views", title="", axis=alt.Axis(tickCount=4)),
    facet=alt.Facet("subject", columns=2, title=" "),
).configure_legend(orient="top").properties(
    width=400,
    height=300,
    title=f"Pageviews on Wikipedia",
)

In [14]:
df.groupby("subject").agg({"views": sum}).reset_index()

Unnamed: 0,subject,views
0,Cat,29961117
1,Dog,20972563
