# Daily temperature averages in Qatar

#### Load python tools

In [1]:
%load_ext lab_black

In [2]:
import requests
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import zipfile
import glob

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Get weather averages from [weatherspark](https://weatherspark.com/s/105083/2/Average-Fall-Weather-in-Doha-Qatar#Figures-Temperature)

In [5]:
src = pd.read_csv("data/raw/Daily_Averages_for_149641_Hamad_International_Airport.csv")

In [6]:
src.columns = (
    src.columns.str.lower()
    .str.strip()
    .str.replace("/", "_", regex=False)
    .str.replace(" (°f)", "", regex=False)
    .str.replace(" (%)", "_pct", regex=False)
    .str.replace(" (in)", "_inch", regex=False)
    .str.replace(" (hr)", "_hour", regex=False)
    .str.replace(" (mph)", "_mph", regex=False)
    .str.strip()
)

#### Just the temperature columns

In [7]:
spike_cols = [col for col in src.columns if "temperature_" in col]

In [8]:
spike_cols

['temperature_high_mean',
 'temperature_high_percentile10th',
 'temperature_high_percentile25th',
 'temperature_high_percentile75th',
 'temperature_high_percentile90th',
 'temperature_low_mean',
 'temperature_low_percentile10th',
 'temperature_low_percentile25th',
 'temperature_low_percentile75th',
 'temperature_low_percentile90th',
 'temperature_mean',
 'perceivedtemperature_high',
 'perceivedtemperature_low',
 'watertemperature_mean',
 'watertemperature_percentile10th',
 'watertemperature_percentile25th',
 'watertemperature_percentile75th',
 'watertemperature_percentile90th',
 'temperature_frigid_pct',
 'temperature_freezing_pct',
 'temperature_chilly_pct',
 'temperature_cold_pct',
 'temperature_cool_pct',
 'temperature_comfortable_pct',
 'temperature_warm_pct',
 'temperature_hot_pct',
 'temperature_sweltering_pct']

In [9]:
src_slim = src[
    [
        "timestamp",
        "temperature_high_mean",
        "temperature_high_percentile10th",
        "temperature_high_percentile25th",
        "temperature_high_percentile75th",
        "temperature_high_percentile90th",
        "temperature_low_mean",
        "temperature_low_percentile10th",
        "temperature_low_percentile25th",
        "temperature_low_percentile75th",
        "temperature_low_percentile90th",
        "temperature_mean",
    ]
]

In [10]:
src_melt = pd.melt(
    src_slim,
    id_vars=["timestamp"],
    value_vars=[
        "temperature_high_mean",
        "temperature_high_percentile10th",
        "temperature_high_percentile25th",
        "temperature_high_percentile75th",
        "temperature_high_percentile90th",
        "temperature_low_mean",
        "temperature_low_percentile10th",
        "temperature_low_percentile25th",
        "temperature_low_percentile75th",
        "temperature_low_percentile90th",
        "temperature_mean",
    ],
    var_name="category",
    value_name="value",
)

In [11]:
line_categories = ["temperature_high_mean", "temperature_low_mean", "temperature_mean"]
area_categories = [
    "temperature_high_percentile25th",
    "temperature_high_percentile75th",
    "temperature_low_percentile25th",
    "temperature_low_percentile75th",
]

In [12]:
world_cup_dates = ["2022-11-21", "2022-12-17"]

In [13]:
world_cup_dates

['2022-11-21', '2022-12-17']

In [14]:
match_dates = pd.DataFrame(
    [
        "2022-11-21",
        "2022-11-22",
        "2022-11-23",
        "2022-11-24",
        "2022-11-25",
        "2022-11-26",
        "2022-11-27",
        "2022-11-28",
        "2022-11-29",
        "2022-11-30",
        "2022-12-01",
        "2022-12-02",
        "2022-12-03",
        "2022-12-04",
        "2022-12-05",
        "2022-12-06",
        "2022-12-09",
        "2022-12-10",
        "2022-12-13",
        "2022-12-14",
        "2022-12-17",
        "2022-12-18",
    ],
    columns=["match_date"],
)

In [15]:
match_dates

Unnamed: 0,match_date
0,2022-11-21
1,2022-11-22
2,2022-11-23
3,2022-11-24
4,2022-11-25
5,2022-11-26
6,2022-11-27
7,2022-11-28
8,2022-11-29
9,2022-11-30


In [16]:
dates_df = pd.DataFrame(
    [{"x_min": "2022-11-21", "x_max": "2022-12-18", "y_min": 0, "y_max": 120}]
)

In [17]:
dates = (
    alt.Chart(dates_df)
    .mark_rect(color="#e9e9e9", opacity=0.3)
    .encode(
        x=alt.X("x_min:T", title=" "),
        x2="x_max:T",
        y=alt.Y("y_min:Q", title=" "),
        y2="y_max:Q",
    )
)

low_line = (
    alt.Chart(src_slim)
    .mark_line(color="#005f66")
    .encode(
        x=alt.X("timestamp:T", axis=alt.Axis(format="%b", tickCount=8)),
        y=alt.Y("temperature_low_mean", axis=alt.Axis(tickCount=6)),
    )
)

low_band = (
    alt.Chart(src_slim)
    .mark_area(opacity=0.2, color="#005f66")
    .encode(
        x="timestamp:T",
        y="temperature_low_percentile25th",
        y2="temperature_low_percentile75th",
    )
)

low_band2 = (
    alt.Chart(src_slim)
    .mark_area(opacity=0.1, color="#005f66")
    .encode(
        x="timestamp:T",
        y="temperature_low_percentile10th",
        y2="temperature_low_percentile90th",
    )
)

high_line = (
    alt.Chart(src_slim)
    .mark_line(color="#d95f1a")
    .encode(x="timestamp:T", y="temperature_high_mean")
)

high_band = (
    alt.Chart(src_slim)
    .mark_area(opacity=0.2, color="#d95f1a")
    .encode(
        x="timestamp:T",
        y="temperature_high_percentile25th",
        y2="temperature_high_percentile75th",
    )
)

high_band2 = (
    alt.Chart(src_slim)
    .mark_area(opacity=0.1, color="#d95f1a")
    .encode(
        x="timestamp:T",
        y="temperature_high_percentile10th",
        y2="temperature_high_percentile90th",
    )
)

(
    dates + low_band2 + low_band + low_line + high_band2 + high_band + high_line
).properties(width=520, height=400, title="")

  for col_name, dtype in df.dtypes.iteritems():
