In [18]:
import torch  
import torch.nn as nn
import numpy as np
import pandas as pd
import geopandas as gpd
import sklearn.preprocessing as skp
from sklearn.model_selection import train_test_split as tts
import plotly.graph_objects as plt
import seaborn as s
import matplotlib.pyplot as pt

# Part 1

In [2]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [3]:
from ucimlrepo import fetch_ucirepo, list_available_datasets
list_available_datasets() #minimum tmeps in aus is not available here 

-------------------------------------
The following datasets are available:
-------------------------------------
Dataset Name                                                                            ID    
------------                                                                            --    
Abalone                                                                                 1     
Adult                                                                                   2     
Annealing                                                                               3     
Audiology (Standardized)                                                                8     
Auto MPG                                                                                9     
Automobile                                                                              10    
Balance Scale                                                                           12    
Balloons                       

In [4]:
import os
from kaggle.api.kaggle_api_extended import KaggleApi

# Set up the Kaggle API client
api = KaggleApi()
api.authenticate()

# Download the dataset
dataset = 'paulbrabban/daily-minimum-temperatures-in-melbourne'
api.dataset_download_files(dataset, path='.', unzip=True)

file_path = r'C:\Users\Ankit\Documents\Vedanshi\IISERB-ML-DL-course\answers\daily-minimum-temperatures-in-me.csv'
df = pd.read_csv(file_path, header=0, on_bad_lines='skip', names=['Date','min_temp'])
df['Date'] = pd.to_datetime(df['Date'])
df['min_temp'] = pd.to_numeric(df['min_temp'], errors='coerce') 
df.head()

Dataset URL: https://www.kaggle.com/datasets/paulbrabban/daily-minimum-temperatures-in-melbourne


Unnamed: 0,Date,min_temp
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8


In [5]:
df.isna().sum() #dataset is clean.
df = df.dropna(subset=['min_temp'])

In [6]:
print('\n\n\n\n',df.describe(),df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 3647 entries, 0 to 3649
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      3647 non-null   datetime64[ns]
 1   min_temp  3647 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 85.5 KB




                                 Date     min_temp
count                           3647  3647.000000
mean   1986-01-01 03:18:12.733753792    11.186647
min              1981-01-01 00:00:00     0.000000
25%              1983-07-03 12:00:00     8.300000
50%              1986-01-02 00:00:00    11.000000
75%              1988-07-01 12:00:00    14.000000
max              1990-12-31 00:00:00    26.300000
std                              NaN     4.061671 None


In [7]:
df.dtypes

Date        datetime64[ns]
min_temp           float64
dtype: object

In [8]:
fig1 = plt.Figure(data=plt.Scatter(x=df['Date'], y=df['min_temp'], mode='markers'))
fig1.update_layout(title='Daily Minimum Temperatures in Melbourne (Scatter Plot)',
                   xaxis_title='Date', yaxis_title='Minimum Temperature')
fig1.show(renderer='browser')

fig2 = plt.Figure(data=plt.Scatter(x=df['Date'], y=df['min_temp'], mode='lines'))
fig2.update_layout(title='Daily Minimum Temperatures in Melbourne (Line Plot)',
                   xaxis_title='Date', yaxis_title='Minimum Temperature')
fig2.show(renderer='browser')

In [9]:
sacle=skp.StandardScaler()
df['min_temp_scaled'] = sacle.fit_transform(df[['min_temp']].astype(float))
fig = plt.Figure(data=plt.Scatter(x=df['Date'], y=df['min_temp_scaled'], mode='markers', name='Scaled Minimum Temperatures'))
fig.update_layout(title='Daily Minimum Temperatures in Melbourne (scatter Plot)',
                   xaxis_title='Date', yaxis_title='scaled Minimum Temperature')
fig.show(renderer='browser')

# the data is forming sin like pattern. 

#### Rolling mean 
- is a statistical calculation used to analyze data points by creating a series of averages of different subsets of the full dataset. It's particularly useful for smoothing out short-term fluctuations and highlighting longer-term trends or patterns in time series data.

- The main purpose of a rolling mean is to smooth out short-term fluctuations or noise in the data, providing a clearer picture of underlying trends or patterns.

In [10]:
df['rolling_mean']=df['min_temp'].rolling(window=7).mean()
df.dropna(inplace=True)
df

Unnamed: 0,Date,min_temp,min_temp_scaled,rolling_mean
6,1981-01-07,15.8,1.135982,17.057143
7,1981-01-08,17.4,1.529963,16.585714
8,1981-01-09,21.8,2.613409,17.142857
9,1981-01-10,20.0,2.170181,17.314286
10,1981-01-11,16.2,1.234477,17.542857
...,...,...,...,...
3645,1990-12-27,14.0,0.692754,13.100000
3646,1990-12-28,13.6,0.594259,13.171429
3647,1990-12-29,13.5,0.569635,13.214286
3648,1990-12-30,15.7,1.111359,13.471429


In [13]:
#scaling the data using minmax scaler to normlize the data.
scaler=skp.MinMaxScaler()
df['norm_temp']=scaler.fit_transform(df[['min_temp']])
df['norm_rm']=scaler.fit_transform(df[['rolling_mean']])
df

Unnamed: 0,Date,min_temp,min_temp_scaled,rolling_mean,norm_temp,norm_rm
6,1981-01-07,15.8,1.135982,17.057143,0.600760,0.786378
7,1981-01-08,17.4,1.529963,16.585714,0.661597,0.760836
8,1981-01-09,21.8,2.613409,17.142857,0.828897,0.791022
9,1981-01-10,20.0,2.170181,17.314286,0.760456,0.800310
10,1981-01-11,16.2,1.234477,17.542857,0.615970,0.812693
...,...,...,...,...,...,...
3645,1990-12-27,14.0,0.692754,13.100000,0.532319,0.571981
3646,1990-12-28,13.6,0.594259,13.171429,0.517110,0.575851
3647,1990-12-29,13.5,0.569635,13.214286,0.513308,0.578173
3648,1990-12-30,15.7,1.111359,13.471429,0.596958,0.592105


In [12]:
#seeing the norm data
fig_norm = plt.Figure(data=plt.Scatter(x=df['Date'], y=df['norm_temp'], mode='markers', name='normalized Minimum Temperatures'))
fig_norm.update_layout(title='Daily Minimum Temperatures in Melbourne (scatter Plot)',
                   xaxis_title='Date', yaxis_title='normalized Minimum Temperature')
fig_norm.show(renderer='browser')

In [14]:
#seeing the normrolling mean
fig_rm = plt.Figure(data=plt.Scatter(x=df['Date'], y=df['norm_rm'], mode='markers', name='normalized rolling mean'))
fig_rm.update_layout(title='Normalized rolling mean',
                   xaxis_title='Date', yaxis_title='normalized rolling mean')
fig_rm.show(renderer='browser')

In [15]:
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length]
        label = data[i + seq_length]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)


In [16]:
x,y=create_sequences(df['norm_temp'].values,7)
print(x.shape,'\n',y.shape)

(3634, 7) 
 (3634,)


# Part 2 

In [20]:
# Simple neural network to perform regression
class RegressionNN(nn.Module):
    def __init__(self):
        super(RegressionNN, self).__init__()
        self.fc1 = nn.Linear(7, 64) #input layer
        self.relu1 = nn.ReLU()#activation of the first hidden layer
        self.fc2 = nn.Linear(64, 64)# connections between the 1st and 2nd hidden layer.
        self.relu2 = nn.ReLU()# activation for the last hidden layer.
        self.fc3 = nn.Linear(64, 1)# output layer


    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x


# Part 3

In [21]:
model = RegressionNN()
# Loss function 
criterion = nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)