In [79]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, r2_score

In [80]:
df = pd.read_csv('dataset/train_clean_c0.csv')
df.head()

Unnamed: 0,Particulate_matter,SO2_concentration,O3_concentration,CO_concentration,NO2_concentration,Presure,Dew_point,Precipitation,Anonymous_X1,Wind_speed,Moisture_percent,Temperature,Datetime,ID
0,-1.016712,-1.192496,0.46129,-1.357899,-1.369288,1.192404,-1.69984,-0.081345,-1.93933,2.868591,0.993239,-1.235578,0.0,0
1,-1.037661,-0.834434,0.443937,-0.660369,-0.619605,1.251684,-1.75082,-0.081345,-1.895684,1.509993,1.955098,-1.257531,3.6e-05,1
2,-1.100509,-0.928905,0.426584,-0.945783,-0.424172,1.271444,-1.932891,-0.081345,-0.155429,2.566681,2.186152,-1.296461,7.1e-05,2
3,-1.110983,-0.566204,0.443937,-0.488062,-0.990928,1.360364,-1.998436,-0.081345,-1.348701,2.26477,2.060491,-1.316548,0.000107,3
4,-1.110983,-0.580403,0.322468,-0.593437,-0.622602,1.449284,-1.925608,-0.081345,-0.732043,0.981649,2.09871,-1.358322,0.000143,4


In [81]:
df_ids = pd.DataFrame()
df_ids['ID'] = df['ID']
print(df_ids['ID'])

df = df.drop('ID', axis=1)
df.head()

0            0
1            1
2            2
3            3
4            4
         ...  
27995    27995
27996    27996
27997    27997
27998    27998
27999    27999
Name: ID, Length: 28000, dtype: int64


Unnamed: 0,Particulate_matter,SO2_concentration,O3_concentration,CO_concentration,NO2_concentration,Presure,Dew_point,Precipitation,Anonymous_X1,Wind_speed,Moisture_percent,Temperature,Datetime
0,-1.016712,-1.192496,0.46129,-1.357899,-1.369288,1.192404,-1.69984,-0.081345,-1.93933,2.868591,0.993239,-1.235578,0.0
1,-1.037661,-0.834434,0.443937,-0.660369,-0.619605,1.251684,-1.75082,-0.081345,-1.895684,1.509993,1.955098,-1.257531,3.6e-05
2,-1.100509,-0.928905,0.426584,-0.945783,-0.424172,1.271444,-1.932891,-0.081345,-0.155429,2.566681,2.186152,-1.296461,7.1e-05
3,-1.110983,-0.566204,0.443937,-0.488062,-0.990928,1.360364,-1.998436,-0.081345,-1.348701,2.26477,2.060491,-1.316548,0.000107
4,-1.110983,-0.580403,0.322468,-0.593437,-0.622602,1.449284,-1.925608,-0.081345,-0.732043,0.981649,2.09871,-1.358322,0.000143


In [82]:
X = df.drop('Temperature', axis=1).values
y = df['Temperature'].values

In [83]:
split_index = int(0.8 * len(X))  # First 80% for training

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (22400, 12)
X_test shape: (5600, 12)
y_train shape: (22400,)
y_test shape: (5600,)


In [84]:
class weather_data:
    def __init__(self, X, y, scale_data=True):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            if scale_data:
                X = StandardScaler().fit_transform(X)
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [85]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(12, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.layers(x)

In [86]:
if __name__=='__main__':
    torch.manual_seed(42)
    X,y = X, y

In [87]:
dataset = weather_data(X, y, scale_data=False)
trainloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True)
testloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True)

In [88]:
mlp = MLP()

loss_function = nn.L1Loss()
optimizer = torch.optim.Adagrad(mlp.parameters(), lr=1e-4)

In [89]:
for epoch in range(0,5):
    print(f'Starting Epoch {epoch+1}')

    current_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, targets = data
        inputs, targets = inputs.float(), targets.float()
        targets = targets.reshape((targets.shape[0], 1))

        optimizer.zero_grad()

        outputs = mlp(inputs)

        loss = loss_function(outputs, targets)

        loss.backward()

        optimizer.step()

        current_loss += loss.item()

        if i%10 == 0:
            print(f'Loss after mini-batch %5d: %.3f'%(i+1, current_loss/500))
            current_loss = 0.0

    print(f'Epoch {epoch+1} finished')

print("Training has completed")

Starting Epoch 1
Loss after mini-batch     1: 0.001
Loss after mini-batch    11: 0.016
Loss after mini-batch    21: 0.016
Loss after mini-batch    31: 0.016
Loss after mini-batch    41: 0.019
Loss after mini-batch    51: 0.015
Loss after mini-batch    61: 0.016
Loss after mini-batch    71: 0.016
Loss after mini-batch    81: 0.015
Loss after mini-batch    91: 0.015
Loss after mini-batch   101: 0.016
Loss after mini-batch   111: 0.016
Loss after mini-batch   121: 0.015
Loss after mini-batch   131: 0.013
Loss after mini-batch   141: 0.015
Loss after mini-batch   151: 0.018
Loss after mini-batch   161: 0.015
Loss after mini-batch   171: 0.016
Loss after mini-batch   181: 0.014
Loss after mini-batch   191: 0.013
Loss after mini-batch   201: 0.015
Loss after mini-batch   211: 0.017
Loss after mini-batch   221: 0.016
Loss after mini-batch   231: 0.017
Loss after mini-batch   241: 0.017
Loss after mini-batch   251: 0.016
Loss after mini-batch   261: 0.016
Loss after mini-batch   271: 0.015
Los

In [90]:
test_data = torch.from_numpy(X_test).float()
test_targets = torch.from_numpy(y_test).float()

In [91]:
mlp.eval() 

MLP(
  (layers): Sequential(
    (0): Linear(in_features=12, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [None]:
with torch.no_grad():
    outputs = mlp(test_data)
    predicted_labels = outputs.squeeze().tolist()

predicted_labels = np.array(predicted_labels)
test_targets = np.array(test_targets)

rmse = root_mean_squared_error(test_targets, predicted_labels)
r2 = r2_score(test_targets, predicted_labels)
print("Root Mean Squared Error:", rmse)
print("R2 Score:", r2)

Mean Squared Error: 0.510135305686586
R2 Score: 0.623430872605933
