In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from sklearn import linear_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
seed = 42

In [3]:
df = pd.read_csv('Farmgate all generated data for lstm.csv')
df.Date = pd.to_datetime(df.Date, format = '%m/%d/%Y')
df = df.set_index('Date')
df.head()

Unnamed: 0_level_0,Rainfall,Temp,Humidity,Wind Speed,Solar Rad,SO2,NO2,CO,O3,PM2.5,PM10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2013-01-01,0,17.0,69,0.0,183.69,10.99,64.8,3.01,7.77,178.41,515.31
2013-01-02,0,18.4,68,2.0,203.27,13.41,71.56,2.73,7.78,204.03,473.83
2013-01-03,0,18.9,74,0.0,188.48,10.87,67.23,2.45,6.91,278.17,462.5
2013-01-04,0,19.9,70,2.0,251.45,12.87,61.54,0.96,20.99,191.92,402.0825
2013-01-05,0,18.7,60,2.0,362.78,15.033333,55.85,2.366667,19.496667,212.946667,341.665


In [4]:
df_scaled = (df - df.mean())/ df.std()
df_scaled.head()

Unnamed: 0_level_0,Rainfall,Temp,Humidity,Wind Speed,Solar Rad,SO2,NO2,CO,O3,PM2.5,PM10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2013-01-01,-0.364317,-2.256193,-0.278817,-1.912226,-2.49526,0.213221,0.810917,1.06104,-0.115261,1.128015,2.972864
2013-01-02,-0.364317,-1.916256,-0.370281,-0.19739,-2.277708,0.407265,1.06181,0.840476,-0.114491,1.469793,2.637867
2013-01-03,-0.364317,-1.79485,0.178503,-1.912226,-2.442038,0.203599,0.901105,0.619911,-0.181466,2.458844,2.546365
2013-01-04,-0.364317,-1.552038,-0.187353,-0.19739,-1.742384,0.363966,0.689925,-0.553808,0.902442,1.308242,2.058427
2013-01-05,-0.364317,-1.843412,-1.101992,-0.19739,-0.505406,0.537429,0.478744,0.554267,0.787482,1.588745,1.570489


In [5]:
df_scaled.reset_index(inplace = True)
df_scaled = df_scaled.drop('Date', 1)
df_scaled.head()

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Solar Rad,SO2,NO2,CO,O3,PM2.5,PM10
0,-0.364317,-2.256193,-0.278817,-1.912226,-2.49526,0.213221,0.810917,1.06104,-0.115261,1.128015,2.972864
1,-0.364317,-1.916256,-0.370281,-0.19739,-2.277708,0.407265,1.06181,0.840476,-0.114491,1.469793,2.637867
2,-0.364317,-1.79485,0.178503,-1.912226,-2.442038,0.203599,0.901105,0.619911,-0.181466,2.458844,2.546365
3,-0.364317,-1.552038,-0.187353,-0.19739,-1.742384,0.363966,0.689925,-0.553808,0.902442,1.308242,2.058427
4,-0.364317,-1.843412,-1.101992,-0.19739,-0.505406,0.537429,0.478744,0.554267,0.787482,1.588745,1.570489


In [6]:
array = df_scaled.iloc[:, 0:12].values
print(array.shape)

(2191, 11)


In [7]:
X_array = array[:, 0:5]
y_array = array[:, 5:12]

In [8]:
class AirMeteoroDataset(Dataset):
    def __init__(self):
        self.len = X_array.shape[0]
        self.X_data = torch.from_numpy(X_array)
        self.y_data = torch.from_numpy(y_array)

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [9]:
dataset = AirMeteoroDataset()
#Dataset split

train_size = round(len(dataset) * 0.85)
test_size = len(dataset) - train_size

train_set, test_set = torch.utils.data.random_split(dataset,[train_size, test_size], generator = torch.Generator().manual_seed(seed))

In [10]:
X_train = torch.empty(0, 5).float().cuda()
y_train = torch.empty(0, 6).float().cuda()
for i, (X_data, y_data) in enumerate(train_set):
    X_data = X_data.float().reshape(1, 5).cuda()
    X_train = torch.cat((X_train, X_data), 0)
    
    y_data = y_data.float().reshape(1, 6).cuda()
    y_train = torch.cat((y_train, y_data), 0)

In [11]:
X_test = torch.empty(0, 5).float().cuda()
y_test = torch.empty(0, 6).float().cuda()
for i, (X_data, y_data) in enumerate(test_set):
    X_data = X_data.float().reshape(1, 5).cuda()
    X_test = torch.cat((X_test, X_data), 0)
    
    y_data = y_data.float().reshape(1, 6).cuda()
    y_test = torch.cat((y_test, y_data), 0)

In [12]:
X_train = np.array(X_train.cpu())
y_train = np.array(y_train.cpu())
xy = np.column_stack((X_train, y_train)) 
df_train = pd.DataFrame(xy)
df_train.columns = ["0", "1", "2", "3","4","5","6","7","8","9","10"]

In [13]:
X_test = np.array(X_test.cpu())
y_test = np.array(y_test.cpu())
xy = np.column_stack((X_test, y_test)) 
df_test = pd.DataFrame(xy)
df_test.columns = ["0", "1", "2", "3","4","5","6","7","8","9","10"]

In [14]:
reg_model = linear_model.LinearRegression()
reg_model.fit(df_train[["0", "1", "2", "3","4"]], df_train[["5","6","7","8","9","10"]])

LinearRegression()

In [15]:
outputs = reg_model.predict(df_test[["0", "1", "2", "3","4"]])

In [16]:
outputs = torch.from_numpy(outputs)
y_test = torch.from_numpy(y_test)

In [17]:
loss_MSE = nn.MSELoss()
MSEloss = loss_MSE(outputs, y_test)

In [18]:
MSEloss

tensor(0.8512)

In [19]:
outputs.shape

torch.Size([329, 6])