In [119]:
import pandas as pd
import glob
import numpy as np

In [156]:
from datetime import datetime, date, timedelta

In [2]:
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# data

In [3]:
parent_dir = '/Users/mio/Documents/GitHub/lede-project2/temperature/'

In [74]:
files = glob.glob(parent_dir + "*")

In [191]:
max_temp = pd.DataFrame()

In [192]:
for file in files:
    df = pd.read_csv(file, encoding='shift-jis')
    df = df.reset_index()
    df.iloc[:,0] = pd.to_datetime(df.iloc[:,0],errors='coerce')
    df = df.dropna(subset=df.columns[0])
    df = df.set_index(df.columns[0]).iloc[:,0]
    max_temp = pd.concat([max_temp,df])

In [193]:
max_temp = max_temp.sort_index()\
    .dropna(how='any').rename(columns={max_temp.columns[0]:'Temperature'})

In [194]:
max_temp['date'] = max_temp.index.map(lambda x: x.replace(year=2020))

In [195]:
max_temp['year'] = max_temp.index.year

In [196]:
max_temp.Temperature = max_temp.Temperature.astype(float)

In [197]:
max_temp.sample(10)

Unnamed: 0,Temperature,date,year
1876-08-23,31.1,2020-08-23,1876
1941-11-14,14.4,2020-11-14,1941
1936-01-27,7.1,2020-01-27,1936
1929-05-26,24.2,2020-05-26,1929
2019-03-03,10.4,2020-03-03,2019
1987-07-13,30.3,2020-07-13,1987
1942-08-20,27.7,2020-08-20,1942
1878-12-17,9.5,2020-12-17,1878
1876-06-21,25.8,2020-06-21,1876
1903-01-25,15.5,2020-01-25,1903


# heatmap

In [212]:
source = max_temp.reset_index()
source.date = source.date.dt.strftime('%m-%d')
source = source[source.date!='02-29']
source.year = source.year.astype(str)

scale = alt.Scale(
    domain=[0, 10, 20, 30, 35, 40],
    range=['darkblue','skyblue','white', 'orange', 'darkred','black'],
    type='linear'
)

xticks = (pd.date_range(date(2019,12,1),date(2020,11,30), freq='M')+ timedelta(days=1)).strftime('%m-%d').to_list()
yticks = list(np.arange(1870,2022,20).astype(str))+['2022']

heatmap = alt.Chart(source).mark_rect(
).encode(
    x=alt.X('date:O', axis=alt.Axis(values=xticks)),
    y=alt.Y('year:O', axis=alt.Axis(values=yticks)),
    color=alt.Color('Temperature:Q', scale = scale)
).properties(
    height=600,width=500,
    title='Daily Maximum Temperatures in Tokyo, 1975-present')

In [213]:
heatmap

In [214]:
#heatmap.save('heatmap.svg')

# chart

In [223]:
heatwave = max_temp[max_temp.Temperature>=35].resample('M').Temperature.count()

In [226]:
heatwave = heatwave.to_frame()

In [231]:
heatwave['year'] = heatwave.index.year
heatwave['month'] = heatwave.index.month

In [353]:
data_2020s = heatwave[(heatwave.year>=2020)].groupby('month').Temperature.mean().rename('2020s')

In [354]:
data_2010s = heatwave[(heatwave.year>=2010)&(heatwave.year<2020)].groupby('month').Temperature.mean().rename('2010s')

In [355]:
data_2010s = heatwave[(heatwave.year>=2010)&(heatwave.year<2020)].groupby('month').Temperature.mean().rename('2010s')

In [356]:
data_2000s = heatwave[(heatwave.year>=2000)&(heatwave.year<2010)].groupby('month').Temperature.mean().rename('2000s')

In [357]:
#data_1990s = heatwave[(heatwave.year>=1990)&(heatwave.year<2000)].groupby('month').Temperature.mean().rename('1990s')

In [358]:
#data_1990_2009 = heatwave[(heatwave.year>=1990)&(heatwave.year<2010)].groupby('month').Temperature.mean().rename('1990-2009')

In [359]:
#data_1875_1989 = heatwave[heatwave.year<1990].groupby('month').Temperature.mean().rename('before 1990')

In [422]:
data_before2000 =heatwave[heatwave.year<2000].groupby('month').Temperature.mean().rename('before2000')

In [423]:
timeseries = pd.concat([
    data_2020s,
    data_2010s,
    data_2000s,
    data_before2000
    #data_1990s,
    #data_1990_2009,
    #data_1875_1989
],axis=1)

In [424]:
timeseries = timeseries.rename_axis('year', axis=1).unstack().rename('days').reset_index()

In [386]:
timeseries.month.unique()

array(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
       'Oct', 'Nov', 'Dec'], dtype=object)

In [426]:
timeseries.month = pd.to_datetime(timeseries.month, format='%m').dt.strftime('%b')

In [433]:
sort_ = timeseries.year.unique()
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

bars = alt.Chart(timeseries).mark_bar().encode(
    x=alt.X('month:N', sort=months),
    y=alt.Y('days:Q',scale=alt.Scale(domain=(0, 8))),
    color='year:N',
    row=alt.Row('year:N', sort=sort_)
).configure_axis(
    grid=False
).properties(height=100, width=400,
            title='Average Number Of Times Daily Max Temperatures Were Above 35 Degrees Celcius')

bars

In [412]:
pd.set_option('display.float_format',  '{:,.1f}'.format)

In [431]:
timeseries.pivot(index='year',columns='month',values='days').loc[sort_,months]

month,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020s,0.0,0.0,0.0,0.0,0.0,2.0,1.0,6.5,0.5,0.0,0.0,0.0
2010s,0.0,0.0,0.0,0.0,0.0,0.1,2.4,4.9,0.6,0.0,0.0,0.0
2000s,0.0,0.0,0.0,0.0,0.0,0.1,1.4,2.0,0.1,0.0,0.0,0.0
before2000,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.7,0.1,0.0,0.0,0.0


In [434]:
#bars.save('number_of_heatwaves.svg')