In [1]:
import pandas as pd

DATA = '/kaggle/input/india-statewise-daily-temperature-for-dec-2024/India_December_2024_Temperature.csv'
df = pd.read_csv(filepath_or_buffer=DATA)
df.head()

Unnamed: 0,State,Dec 1,Dec 2,Dec 3,Dec 4,Dec 5,Dec 6,Dec 7,Dec 8,Dec 9,...,Dec 22,Dec 23,Dec 24,Dec 25,Dec 26,Dec 27,Dec 28,Dec 29,Dec 30,Dec 31
0,Andhra Pradesh,13.212643,10.698105,16.405815,8.497319,6.896153,33.514421,26.366592,30.615332,28.771567,...,28.63742,31.475606,14.344489,22.929506,33.006298,10.007411,24.933718,27.370134,26.649283,29.506918
1,Arunachal Pradesh,12.695942,29.56684,31.158805,10.588216,13.709803,11.024991,17.130984,31.089233,19.527048,...,29.458735,32.622218,17.273692,13.079666,10.746457,23.848608,25.417434,6.913371,7.564845,18.765009
2,Assam,34.221683,27.610922,19.972,12.033237,30.886242,18.723383,20.659154,13.357581,18.972698,...,5.204009,22.686138,25.675395,10.805427,26.051998,14.689476,17.082374,5.923024,28.29882,19.775023
3,Bihar,10.779094,15.231415,17.850692,29.218902,8.933719,5.955923,22.229581,21.429557,31.569247,...,7.876851,28.723005,18.025004,32.105696,24.130962,22.86243,14.293511,11.288263,18.590031,23.730088
4,Chhattisgarh,34.936467,5.651792,5.474914,20.45945,11.042233,32.023758,33.792161,16.842981,18.342867,...,17.991595,25.268479,19.447461,10.116281,28.56961,28.191694,32.47213,7.523127,33.736081,24.335218


We need to transpose our data and do some cleanup so it will tell us the temperature in each state on each date.

In [2]:
t_df = df.T
t_df.columns = t_df.iloc[0]
t_df = t_df.tail(n=len(t_df)-1)
t_df = t_df.reset_index()
t_df['index'] = ['2024-12-{}'.format(row) for row in range(1,32)]
t_df['index'] = pd.to_datetime(t_df['index'])
t_df = t_df.set_index('index')
t_df = t_df.astype(float)

t_df.head()

State,Andhra Pradesh,Arunachal Pradesh,Assam,Bihar,Chhattisgarh,Goa,Gujarat,Haryana,Himachal Pradesh,Jharkhand,...,Odisha,Punjab,Rajasthan,Sikkim,Tamil Nadu,Telangana,Tripura,Uttar Pradesh,Uttarakhand,West Bengal
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-01,13.212643,12.695942,34.221683,10.779094,34.936467,8.513479,8.43558,23.593477,19.438263,9.196777,...,19.641743,28.00439,18.706199,19.49793,34.165587,5.157948,32.165643,27.052728,34.23387,19.120605
2024-12-02,10.698105,29.56684,27.610922,15.231415,5.651792,23.987613,30.588284,13.769267,30.692019,13.038872,...,18.041636,29.772795,28.841781,8.516257,7.460116,22.683388,28.665979,15.778935,7.478963,20.054824
2024-12-03,16.405815,31.158805,19.972,17.850692,5.474914,31.23923,24.121757,10.455108,16.323684,13.478002,...,22.899512,12.970303,31.70226,30.590293,28.071835,7.245415,8.427988,24.725285,11.078355,5.263997
2024-12-04,8.497319,10.588216,12.033237,29.218902,20.45945,12.509478,8.943422,25.860264,10.337464,22.225702,...,33.29762,19.533209,15.470334,18.371986,12.378727,25.328755,5.892994,29.111892,26.108641,10.476545
2024-12-05,6.896153,13.709803,30.886242,8.933719,11.042233,30.960864,10.041003,22.545493,15.280419,33.399422,...,13.791955,30.270283,22.716155,11.398827,23.912186,10.974836,17.231522,27.048298,11.476495,6.022607


First let's look at the mean temperature for each day.

In [3]:
from plotly import express

express.line(data_frame=t_df.mean(axis='columns').to_frame().reset_index().rename(columns={'index': 'date', 0:'temperature'}), x='date', y='temperature')

What do we see? We might expect there to be no clear trend across the month of December, and that's what we see. We see a pretty noisy line.

Now let's try graphing all the data. If we use a line plot we get a tangled mess, so let's try using a scatter plot.

In [4]:
express.scatter(data_frame=t_df.reset_index().melt(id_vars=['index']).rename(columns={'index': 'date'}), x='date', y='value', color='State', height=800)

This data looks really random, suggesting that on average in December all the states in India have essentially the same weather, which is kind of a surprise. Ah. A close look at the data card reveals that this is synthetic data. It sure looks like synthetic data. Let's look at the minimum and maximum values.

In [5]:
express.line(data_frame=t_df.agg({'min', 'max'}, axis='columns').reset_index().melt(id_vars=['index']), x='index', color='variable', y='value')

In fact it seems like the minimum and maximum temperatures stay more or less the same, but just move around from one state to another over the course of the month.