# Hourly temperature averages in Rio, Paris, Mexico City and Rome

#### Load python tools

In [1]:
%load_ext lab_black

In [115]:
import requests
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import zipfile
import glob
import os

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

In [34]:
italy = pd.read_csv("data/raw/noaa/2978566.csv").dropna()
france = pd.read_csv("data/raw/noaa/2978570.csv").dropna()
mexico = pd.read_csv("data/raw/noaa/2978604.csv").dropna()
brazil = pd.read_csv("data/raw/noaa/2978607.csv").dropna()

In [35]:
mexico["TAVG"] = (mexico["TMAX"] + +mexico["TMIN"]) / 2

In [47]:
src = pd.concat([italy, france, mexico, brazil])

In [48]:
src.columns = src.columns.str.lower()

In [49]:
src.drop(["tmax", "tmin"], axis=1, inplace=True)

In [50]:
src[["place", "country"]] = src["name"].str.split(", ", expand=True)

In [51]:
stations = ["IT000016239", "FRM00007149", "BRM00083746"]

---

In [54]:
df = src[src["station"].isin(stations)].copy()

In [55]:
df.groupby("station")["date"].max()

station
BRM00083746    2022-05-13
FRM00007149    2022-05-13
IT000016239    2021-12-31
Name: date, dtype: object

In [57]:
df.head()

Unnamed: 0,station,name,date,tavg,place,country
42692,IT000016239,"ROMA CIAMPINO, IT",1951-10-01,62.0,ROMA CIAMPINO,IT
42693,IT000016239,"ROMA CIAMPINO, IT",1951-10-02,64.0,ROMA CIAMPINO,IT
42694,IT000016239,"ROMA CIAMPINO, IT",1951-10-03,66.0,ROMA CIAMPINO,IT
42695,IT000016239,"ROMA CIAMPINO, IT",1951-10-04,67.0,ROMA CIAMPINO,IT
42696,IT000016239,"ROMA CIAMPINO, IT",1951-10-05,65.0,ROMA CIAMPINO,IT


#### Dates

In [64]:
df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d")
df["year"] = pd.to_datetime(df["date"]).dt.strftime("%Y")
df["month"] = pd.to_datetime(df["date"]).dt.strftime("%m")
df["day"] = pd.to_datetime(df["date"]).dt.strftime("%d")

In [127]:
heatmap = (
    alt.Chart(df[(df["country"] == "IT") & (df["year"] > "1970")])
    .mark_rect()
    .encode(
        x=alt.X(
            "month:O",
            axis=alt.Axis(),
        ),
        y=alt.Y(
            "day:O",
            axis=alt.Axis(),
        ),
        color=alt.Color(
            "tavg:Q",
            title="Avg. hourly temp",
            scale=alt.Scale(scheme="spectral", reverse=True),
        ),
        facet=alt.Facet("year", columns=9),
    )
)
heatmap.properties(width=100, height=50)

---

In [90]:
'https://weatherspark.com/download/history/71779/1965'

In [113]:
url = "https://weatherspark.com/download/history/71779/"

In [117]:
headers = {
    "Connection": "keep-alive",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}

In [124]:
for year in range(1965, 1966):
    url = "http://mattstiles@gmail.com:Ticallionism!1@weatherspark.com/download/history/71779/"
    r = requests.get(url_year, allow_redirects=True, headers=headers)
    open(f"data/raw/weather/rome/rome-{year}.zip", "wb").write(r.content)