# Average temperature changes in Rio, Paris, Mexico City and Rome

#### Load python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import glob
from pathlib import Path
import numpy as np
import altair as alt
import altair_stiles as altstiles

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

# Get data

#### Data pulled from Berkeley Earth project city pages, like this one for [Paris](http://berkeleyearth.lbl.gov/locations/49.03N-2.45E), using the inspector

Pandas struggles with inferring column structure in fixed-width files when there's messy text above the data. So the files were cleaned manually before import by stripping the readme info in the first 67 lines of each file.

In [5]:
places = pd.DataFrame(
    [
        {
            "filename": "rome",
            "place": "Rome",
            "data_url": "http://berkeleyearth.lbl.gov/auto/Local/TAVG/Text/42.59N-13.09E-TAVG-Trend.txt",
            "hosted_count": 2,
            "hosted_years": [1934, 1990],
        },
        {
            "filename": "rio",
            "place": "Rio",
            "data_url": "http://berkeleyearth.lbl.gov/auto/Local/TAVG/Text/23.31S-42.82W-TAVG-Trend.txt",
            "hosted_count": 2,
            "hosted_years": [1950, 2014],
        },
        {
            "filename": "paris",
            "place": "Paris",
            "data_url": "http://berkeleyearth.lbl.gov/auto/Local/TAVG/Text/49.03N-2.45E-TAVG-Trend.txt",
            "hosted_count": 2,
            "hosted_years": [1938, 1998],
        },
        {
            "filename": "mexico_city",
            "place": "Mexico City",
            "data_url": "http://berkeleyearth.lbl.gov/auto/Local/TAVG/Text/20.09N-98.96W-TAVG-Trend.txt",
            "hosted_count": 2,
            "hosted_years": [1970, 1986],
        },
        # {
        #     "filename": "doha",
        #     "place": "Doha",
        #     "data_url": "http://berkeleyearth.lbl.gov/auto/Local/TAVG/Text/24.92N-51.43E-TAVG-Trend.txt",
        # },
        # {
        #     "filename": "new_york",
        #     "place": "New York",
        #     "data_url": "http://berkeleyearth.lbl.gov/auto/Local/TAVG/Text/40.99N-74.56W-TAVG-Trend.txt",
        # },
    ]
)

In [6]:
places

Unnamed: 0,filename,place,data_url,hosted_count,hosted_years
0,rome,Rome,http://berkeleyearth.lbl.gov/auto/Local/TAVG/T...,2,"[1934, 1990]"
1,rio,Rio,http://berkeleyearth.lbl.gov/auto/Local/TAVG/T...,2,"[1950, 2014]"
2,paris,Paris,http://berkeleyearth.lbl.gov/auto/Local/TAVG/T...,2,"[1938, 1998]"
3,mexico_city,Mexico City,http://berkeleyearth.lbl.gov/auto/Local/TAVG/T...,2,"[1970, 1986]"


#### Make a list of all our fixed-width text files to read

In [7]:
all_files = []

for path in Path("data/raw/berkeley/").rglob("*.txt"):
    all_files.append(path.name)

#### Read each file and put into a list of dataframes

In [8]:
dfs_list = []
for file in all_files:
    dfs = pd.read_fwf(
        f"data/raw/berkeley/{file}",
        encoding="Latin-1",
        index_col=False,
        names=[
            "year",
            "month",
            "monthly",
            "monthly_u",
            "annual",
            "annual_u",
            "five_year",
            "five_year_u",
            "ten_year",
            "ten_year_u",
            "twenty_year",
            "twenty_year_u",
        ],
    ).assign(place=file.replace(".txt", "").replace("_", " ").title())
    dfs_list.append(dfs)

#### Concatenate them 

In [9]:
src = pd.concat(dfs_list).reset_index(drop=True)

#### Data types

In [10]:
src[["month", "year"]] = src[["month", "year"]].astype(str)

#### Just the columns we need

In [11]:
df = src[
    [
        "place",
        "month",
        "year",
        "monthly",
        "annual",
    ]
].copy()

#### Fill any missing values

In [12]:
df = df.fillna(np.nan)

---

## Aggregate

#### Group by place and year and get the annual mean

In [13]:
df_grp = df.groupby(["place", "year"]).agg({"annual": "mean"}).reset_index()

In [14]:
df_grp.head()

Unnamed: 0,place,year,annual
0,Doha,1843,-0.625125
1,Doha,1844,
2,Doha,1845,
3,Doha,1846,
4,Doha,1847,


---

## Charts

#### Placeholder axis hack so an altair bar chart will look like "[climate stripes](https://showyourstripes.info/s/southamerica/brazil/riodejaneiro)"

In [15]:
df_grp["y"] = 2

#### Plot the stripes since 1850, coloring by the temp anomoly

In [19]:
alt.Chart(
    df_grp[(df_grp["year"] > "1900") & (df_grp["place"] != "Doha")]
).mark_bar().encode(
    x=alt.X(
        "year:O",
        stack="normalize",
        axis=alt.Axis(
            values=[
                "1920",
                "1940",
                "1960",
                "1980",
                "2000",
                "2020",
            ]
        ),
        title="",
    ),
    y=alt.Y(
        "y", axis=alt.Axis(labels=False, ticks=False, grid=False, offset=-100), title=""
    ),
    color=alt.Color(
        "annual",
        scale=alt.Scale(scheme="redblue", reverse=True),
        title="Diff from 1951-80 mean (C°)",
    ),
    tooltip=["place", "annual"],
    facet=alt.Facet("place", columns=1, title=""),
).properties(
    width=650,
    height=80,
    title=" ",
).configure_legend(
    orient="top", titleFontSize=12
).configure_view(
    strokeOpacity=0
).interactive()

---

## Export

In [17]:
df_grp.to_csv("data/processed/climate_stripes_grouped_place_year.csv", index=False)