In [1]:
import pandas as pd
from arrow import Arrow

df = pd.read_csv(filepath_or_buffer='/kaggle/input/tokyo-weatherdata/weather_tokyo_data.csv')
# we need to clean up dates a bit
df['Date'] = pd.to_datetime(df.apply(axis=1, func=lambda x: Arrow(year=x['year'], month=int(x['day'].split('/')[0]),
                                          day=int(x['day'].split('/')[1])).date()))
# and our temperature represents negative values with parentheses so we need to fix that
df['Temperature'] = df['temperature'].apply(lambda x: float(x.replace('(', '-').replace(')', '')))
df.columns = [item.strip() for item in df.columns]
df.head()

Unnamed: 0,year,day,temperature,humidity,atmospheric pressure,Date,Temperature
0,2022,11/6,13.5,61.0,1019.3,2022-11-06,13.5
1,2022,11/7,13.7,70.0,1018.9,2022-11-07,13.7
2,2022,11/8,15.9,55.0,1016.1,2022-11-08,15.9
3,2022,11/9,14.6,58.0,1022.2,2022-11-09,14.6
4,2022,11/10,15.2,64.0,1020.1,2022-11-10,15.2


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   year                  366 non-null    int64         
 1   day                   366 non-null    object        
 2   temperature           366 non-null    object        
 3   humidity              366 non-null    float64       
 4   atmospheric pressure  366 non-null    float64       
 5   Date                  366 non-null    datetime64[ns]
 6   Temperature           366 non-null    float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(2)
memory usage: 20.1+ KB


In [3]:
from plotly.express import line
line(data_frame=df, x='Date', y=['Temperature', 'humidity',],)

In [4]:
from plotly.express import scatter
scatter(data_frame=df, x='Date', y='Temperature', trendline='rolling', trendline_options=dict(window=28), trendline_color_override='orange')

Our 28-day moving average temperature is pretty smooth, but it looks like it overestimates in winter.

In [5]:
scatter(data_frame=df, x='Date', y='humidity', trendline='lowess', trendline_color_override='orange')

Humidity is a lot more volatile.

In [6]:
from plotly.express import scatter
scatter(data_frame=df, y='Temperature', x='humidity', color='atmospheric pressure')

In [7]:
scatter(data_frame=df, color='Temperature', x='humidity', y='atmospheric pressure')

It would be nice if one of these variables were obviously a function of the other two but that's not how weather works.

In [8]:
from plotly.express import imshow
imshow(img=df[['atmospheric pressure', 'humidity', 'Temperature']].corr())