In [1]:
import numpy as np ## For Linear Algebra
import pandas as pd ## To Work With Data
## For visualizations I'll be using plotly package, this creates interesting and interective visualizations.
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime ## Time Series analysis.
print('Installed')

Installed


In [2]:
df = pd.read_csv("/content/NM Weather Data in India from 1901 to 2017 - Weather Data in India from 1901 to 2017.csv")
df.head()

Unnamed: 0,ID,YEAR,1,2,3,4,5,6,7,8,9,10,11,12
0,0,1901,17.99,19.43,23.49,26.41,28.28,28.6,27.49,26.98,26.26,25.08,21.73,18.95
1,1,1902,19.0,20.39,24.1,26.54,28.68,28.44,27.29,27.05,25.95,24.37,21.33,18.78
2,2,1903,18.32,19.79,22.46,26.03,27.93,28.41,28.04,26.63,26.34,24.57,20.96,18.29
3,3,1904,17.77,19.39,22.95,26.73,27.83,27.85,26.84,26.73,25.84,24.36,21.07,18.84
4,4,1905,17.4,17.79,21.78,24.84,28.32,28.69,27.67,27.47,26.29,26.16,22.07,18.71


In [3]:
df1 = pd.melt(df, id_vars='YEAR', value_vars=df.columns[1:]) ## This will melt the data
df1.head()

Unnamed: 0,YEAR,variable,value
0,1901,1,17.99
1,1902,1,19.0
2,1903,1,18.32
3,1904,1,17.77
4,1905,1,17.4


In [4]:
df1['Date'] = df1['variable'] + ' ' + df1['YEAR'].astype(str)
df1.loc[:,'Date'] = df1['Date'].apply(lambda x : datetime.strptime(x, '%m %Y')) ## Converting String to datetime object
df1.head()

Unnamed: 0,YEAR,variable,value,Date
0,1901,1,17.99,1901-01-01 00:00:00
1,1902,1,19.0,1902-01-01 00:00:00
2,1903,1,18.32,1903-01-01 00:00:00
3,1904,1,17.77,1904-01-01 00:00:00
4,1905,1,17.4,1905-01-01 00:00:00


In [5]:
df1.columns=['Year', 'Month', 'Temprature', 'Date']
df1.sort_values(by='Date', inplace=True) ## To get the time series right.
fig = go.Figure(layout = go.Layout(yaxis=dict(range=[0, df1['Temprature'].max()+1])))
fig.add_trace(go.Scatter(x=df1['Date'], y=df1['Temprature']), )
fig.update_layout(title='Temprature Throught Timeline:',
                 xaxis_title='Time', yaxis_title='Temprature in Degrees')
fig.update_layout(xaxis=go.layout.XAxis(
    rangeselector=dict(
        buttons=list([dict(label="Whole View", step="all"),
                      dict(count=1,label="One Year View",step="year",stepmode="todate")
                     ])),
        rangeslider=dict(visible=True),type="date")
)
fig.show()

In [6]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

df2 = df1[['Year', 'Month', 'Temprature']].copy()
df2 = pd.get_dummies(df2)
y = df2[['Temprature']]
x = df2.drop(columns='Temprature')

dtr = DecisionTreeRegressor()
train_x, test_x, train_y, test_y = train_test_split(x,y,test_size=0.08)
dtr.fit(train_x, train_y)
pred = dtr.predict(test_x)
r2_score(test_y, pred)

0.9653583711301704

In [7]:
next_Year = df1[df1['Year']==2017][['Year', 'Month']]
next_Year.Year.replace(2017,2018, inplace=True)
next_Year= pd.get_dummies(next_Year)
temp_2018 = dtr.predict(next_Year)

temp_2018 = {'Month':df1['Month'].unique(), 'Temprature':temp_2018}
temp_2018=pd.DataFrame(temp_2018)
temp_2018['Year'] = 2018
temp_2018


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





Unnamed: 0,Month,Temprature,Year
0,1,20.59,2018
1,2,23.08,2018
2,3,25.58,2018
3,4,29.17,2018
4,5,30.47,2018
5,6,29.44,2018
6,7,28.31,2018
7,8,28.12,2018
8,9,28.11,2018
9,10,27.24,2018


In [8]:
# Insert an 'ID' column starting from 0
temp_2018.insert(0, 'ID', range(0, len(temp_2018)))

# Save the DataFrame to a CSV file in Google Colab's working directory
temp_2018.to_csv('/content/output_file.csv', index=False)  # Save to Colab's working directory

# Display the first 12 rows of the DataFrame
temp_2018.head(12)

Unnamed: 0,ID,Month,Temprature,Year
0,0,1,20.59,2018
1,1,2,23.08,2018
2,2,3,25.58,2018
3,3,4,29.17,2018
4,4,5,30.47,2018
5,5,6,29.44,2018
6,6,7,28.31,2018
7,7,8,28.12,2018
8,8,9,28.11,2018
9,9,10,27.24,2018


In [9]:
forecasted_temp = pd.concat([df1,temp_2018], sort=False).groupby(by='Year')['Temprature'].mean().reset_index()
fig = go.Figure(data=[
    go.Scatter(name='Yearly Mean Temprature', x=forecasted_temp['Year'], y=forecasted_temp['Temprature'], mode='lines'),
    go.Scatter(name='Yearly Mean Temprature', x=forecasted_temp ['Year'], y=forecasted_temp['Temprature'], mode='markers')
])
fig.update_layout(title='Forecasted Temprature:',
                 xaxis_title='Time', yaxis_title='Temprature in Degrees')
fig.show()