In [1]:
import pandas as pd
import numpy as np
import torch 
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
from sklearn.metrics import mean_squared_error
device = 'cuda' if torch.cuda.is_available() else 'cpu'

ModuleNotFoundError: No module named 'pandas'

In [None]:
df = pd.read_csv("/content/drive/MyDrive/data/winequality-red.csv")

In [None]:
df["quality"].unique()

array([5, 6, 7, 4, 8, 3])

In [None]:
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [None]:
df = df.drop(columns=['alcohol'])

In [None]:
inputs = df.drop('quality',axis=1).values
target = df.quality.values

In [None]:
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [None]:
y.shape

(1599,)

In [None]:
B = np.reshape(target, (-1, 1))

In [None]:
X = torch.from_numpy(inputs.astype('float32')).to(device)
y = torch.from_numpy(B.astype('float32').astype('float32')).to(device)

In [None]:
# Train - Test
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=69)
# Split train into train-val
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, stratify=y_trainval, random_state=21)

In [None]:
X_train.size()

torch.Size([1151, 10])

In [None]:
class RegressionDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

In [None]:
train_dataset = RegressionDataset(X_train,y_train)

val_dataset  = RegressionDataset(X_val,y_val)

test_dataset  = RegressionDataset(X_test,y_test)

In [None]:
EPOCHS = 1000
BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_FEATURES = 10

In [None]:
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, batch_size=1)

In [None]:
# class MultipleRegression(nn.Module):
#     def __init__(self, num_features):
#         super(MultipleRegression, self).__init__()
        
#         self.layer_1 = nn.Linear(num_features, 16)
#         self.layer_2 = nn.Linear(16, 32)
#         self.layer_3 = nn.Linear(32, 16)
#         self.layer_out = nn.Linear(16, 1)
        
#         self.relu = nn.ReLU()
#     def forward(self, inputs):
#             x = self.relu(self.layer_1(inputs))
#             x = self.relu(self.layer_2(x))
#             x = self.relu(self.layer_3(x))
#             x = self.layer_out(x)
#             return (x)

In [None]:
# model = MultipleRegression(NUM_FEATURES)

In [None]:
model = nn.Sequential(nn.Linear(NUM_FEATURES, 16),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(16, 32),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(32, 16),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(16, 1))

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.RAdam(model.parameters(), lr=LEARNING_RATE)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[300,700], gamma=0.5)


In [None]:
min_val_loss = float("inf")
for e in range(EPOCHS):
   # TRAINING
  train_epoch_loss = 0
  model.train()
  step = 0
  for X_train_batch, y_train_batch in train_loader:
    optimizer.zero_grad()
        
    y_train_pred = model(X_train_batch)
    
    train_loss = criterion(y_train_pred, y_train_batch)
    
    train_loss.backward()
    # torch.nn.utils.clip_grad_norm(model.parameters(),max_norm = 1)
    optimizer.step()
    train_epoch_loss += train_loss.item()
    step += 1
    if step % 9 == 0:
      # VALIDATION  
      with torch.no_grad():
          val_epoch_loss = 0
          model.eval()
          for X_val_batch, y_val_batch in val_loader:
              y_val_pred = model(X_val_batch)    
              val_loss = criterion(y_val_pred, y_val_batch)
              val_epoch_loss += val_loss.item()

      
      if min_val_loss  > val_epoch_loss:
        # print(f'Validation Loss Decreased({min_val_loss:.6f}--->{val_epoch_loss:.6f}) \t Saving The Model')  
        min_val_loss = val_epoch_loss
        torch.save(model.state_dict(), 'saved_model.pth')
  scheduler.step()
  if e % 100 == 0:
    print("learning rate : ",optimizer.param_groups[0]['lr'])
    print(f'Epoch {e} \t\t Training Loss: {train_epoch_loss / len(train_loader)} \t\t Validation Loss: {val_epoch_loss / len(val_loader)}')
            

learning rate :  0.001
Epoch 0 		 Training Loss: 12.90828561782837 		 Validation Loss: 10.45638656616211
learning rate :  0.001
Epoch 100 		 Training Loss: 1.522368128101031 		 Validation Loss: 0.847859799861908
learning rate :  0.001
Epoch 200 		 Training Loss: 0.9478866524166532 		 Validation Loss: 0.6327943205833435
learning rate :  0.0005
Epoch 300 		 Training Loss: 0.8119954864184061 		 Validation Loss: 0.5813243389129639
learning rate :  0.0005
Epoch 400 		 Training Loss: 0.7224116457833184 		 Validation Loss: 0.5427526235580444
learning rate :  0.0005
Epoch 500 		 Training Loss: 0.6769651836819119 		 Validation Loss: 0.4759541153907776
learning rate :  0.0005
Epoch 600 		 Training Loss: 0.6374524202611711 		 Validation Loss: 0.4568849503993988
learning rate :  0.00025
Epoch 700 		 Training Loss: 0.6120146099064085 		 Validation Loss: 0.44616222381591797
learning rate :  0.00025
Epoch 800 		 Training Loss: 0.5895018461677763 		 Validation Loss: 0.44344520568847656
learning rate :

In [None]:
y_test.size()

torch.Size([320, 1])

In [None]:
y_pred_list = []
with torch.no_grad():
    model.load_state_dict(torch.load('saved_model.pth'))
    model.eval()
    for X_batch, _ in test_loader:
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.numpy())

In [None]:
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [None]:
mean_squared_error(y_pred_list,y_test)

0.4546965034192354

In [None]:
# StepLR : 0.47645036563194765
# ExponentialLR : 0.4714289650741755
# MultiStepLR : 0.4560511700038944
# ConstantLR : 0.46464827078613596