# Daily temperature averages in Rio, Paris, Mexico City and Rome

#### Load python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import glob
from pathlib import Path
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

## Process weatherspark data

#### Make a list of all our decade-level temperature data files

In [57]:
all_files = []

for path in Path("/Users/stiles/data/climate/worldcup/").rglob("*Summary*.csv"):
    all_files.append(str(path.parent) + "/" + str(path.name))

#### Read each file into a list of dataframes

In [58]:
dfs_list = []
for file in all_files:
    dfs = pd.read_csv(file, on_bad_lines="skip")
    dfs_list.append(dfs)

#### One big dataframe with all cities and decades

In [7]:
src = pd.concat(dfs_list)

#### Clean up the messy column headers

In [8]:
src.columns = (
    src.columns.str.lower()
    .str.strip()
    .str.replace("/", "_", regex=False)
    .str.replace(" (°f)", "", regex=False)
    .str.replace(" (%)", "_pct", regex=False)
    .str.replace(" (in)", "_inch", regex=False)
    .str.replace(" (hr)", "_hour", regex=False)
    .str.replace(" (mph)", "_mph", regex=False)
    .str.replace(" (string)", "", regex=False)
    .str.replace(" ", "_", regex=False)
    .str.replace("temperature_sampled_", "", regex=False)
    .str.strip()
)

#### Place names

In [9]:
src.rename(columns={"time_zone_id": "place"}, inplace=True)

In [10]:
src[["continent", "place"]] = src["place"].str.split("/", expand=True)

In [11]:
src.loc[src["place"] == "Sao_Paulo", "place"] = "Rio"

#### Make a simple dataframe with just the temp columns we need

In [12]:
src_slim = src[["date", "place", "low", "high"]].copy()

#### Process dates

In [13]:
src_slim["date"] = pd.to_datetime(src_slim["date"]).dt.strftime("%Y-%m-%d")
src_slim["year"] = pd.to_datetime(src_slim["date"]).dt.strftime("%Y")
src_slim["month"] = pd.to_datetime(src_slim["date"]).dt.strftime("%m")
src_slim["day"] = pd.to_datetime(src_slim["date"]).dt.strftime("%d")

#### Decade categories

In [14]:
src_slim["decade"] = src_slim["year"].str[:3] + "0s"

#### Get the mean temp

In [15]:
src_slim["mean"] = (src_slim["high"] + src_slim["low"]) / 2

#### Make a copy for analysis

In [16]:
df = src_slim[src_slim["decade"] != "2020s"].sort_values("date", ascending=False).copy()

In [17]:
df.head()

Unnamed: 0,date,place,low,high,year,month,day,decade,mean
3651,2019-12-31,Mexico_City,46.4,75.2,2019,12,31,2010s,60.8
3651,2019-12-31,Rio,73.4,91.4,2019,12,31,2010s,82.4
3651,2019-12-31,Paris,28.4,41.0,2019,12,31,2010s,34.7
3651,2019-12-31,Rome,33.8,51.8,2019,12,31,2010s,42.8
3651,2019-12-31,Qatar,64.4,80.6,2019,12,31,2010s,72.5


---

## Aggregate

In [25]:
annual_groups = (
    df.groupby(["place", "year"]).agg({"mean": "mean"}).round(1).reset_index()
)

In [28]:
len(annual_groups[annual_groups["place"] == "Mexico_City"])

48

#### Group by decades and get mean temp each month and day for each decade

In [18]:
decades = (
    df.groupby(["place", "decade", "month", "day"])
    .agg({"mean": "mean"})
    .round(1)
    .reset_index()
)

In [19]:
decades.head()

Unnamed: 0,place,decade,month,day,mean
0,Mexico_City,1970s,1,1,57.1
1,Mexico_City,1970s,1,2,57.5
2,Mexico_City,1970s,1,3,53.5
3,Mexico_City,1970s,1,4,55.7
4,Mexico_City,1970s,1,5,56.3


---

#### Make a list of our places (excluding Qatar)

In [20]:
places = list(decades[decades["place"] != "Qatar"].place.unique())

#### Loop and chart each place as a heat map

In [27]:
for place in places:
    heatmap = (
        (
            alt.Chart(
                decades[
                    (decades["place"] == f"{place}")
                    & (decades["decade"].str.contains("1970s|2010s"))
                ]
            )
            .mark_rect()
            .encode(
                x=alt.X(
                    "month:O",
                    axis=alt.Axis(title=""),
                ),
                y=alt.Y(
                    "day:O",
                    axis=alt.Axis(title=""),
                ),
                color=alt.Color(
                    "mean:Q",
                    title="",
                    scale=alt.Scale(scheme="spectral", reverse=True),
                ),
                facet=alt.Facet("decade", columns=4, title=""),
            )
        )
        .properties(
            width=220,
            height=120,
            title=f"Mean temperature in {place.replace('_', ' ')}",
        )
        .configure_legend(orient="right")
    )
    heatmap.display()
    heatmap.save(f"viz/{place}.svg")

#### Qatar gets its own heatmap

In [22]:
doha_heatmap = (
    (
        alt.Chart(
            decades[
                (decades["place"] == "Qatar")
                & (decades["decade"].str.contains("1970s|2010s"))
            ]
        )
        .mark_rect()
        .encode(
            x=alt.X(
                "month:O",
                axis=alt.Axis(title=""),
            ),
            y=alt.Y(
                "day:O",
                axis=alt.Axis(title=""),
            ),
            color=alt.Color(
                "mean:Q",
                title="",
                scale=alt.Scale(scheme="spectral", reverse=True),
            ),
            facet=alt.Facet("decade", columns=4, title=""),
        )
    )
    .properties(
        width=300,
        height=500,
        title=f"Qatar",
    )
    .configure_legend(orient="top")
)
doha_heatmap.save(f"viz/Qatar.svg")

In [23]:
doha_heatmap

---

## Export

In [24]:
decades.to_csv("data/processed/mean_temp_decade_olympic_cities.csv", index=False)

In [25]:
df.to_csv("data/processed/mean_temp_daily_olympic_cities.csv", index=False)