In [77]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import *

In [64]:
# Dataset 상속
class CustomDataset(Dataset): 
  def __init__(self, x_train='', y_train=''):
    self.x_data = x_train
    self.y_data = y_train

  # 총 데이터의 개수를 리턴
  def __len__(self): 
    return len(self.x_data)

  def __getitem__(self, idx): 
    x = torch.FloatTensor(self.x_data[idx])
    y = torch.FloatTensor(self.y_data[idx])
    return x, y

In [89]:
df = pd.read_csv('data/winequality-red.csv', sep=';')

In [90]:
q1=df.quantile(0.10)
q2=df.quantile(0.90)
qua =q2-q1

df=df[~((df<(q1-(1.9*qua)))|(df>(q2+(1.9*qua)))).any(axis=1)]

In [91]:
y = np.array(df['quality'].values.tolist())
X = np.array(df.drop(labels=['quality'], axis=1).values.tolist())

In [92]:
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.33, random_state=96)
X_train, X_valid, y_train, y_valid = train_test_split(X_trainval, y_trainval, test_size=0.33, random_state=96)

In [93]:
ss_scaler = StandardScaler()
X_train_SS = ss_scaler.fit_transform(X_train)
X_valid_SS = ss_scaler.transform(X_valid)
X_test_SS = ss_scaler.transform(X_test)

In [94]:
print(X_train_SS.shape, X_valid_SS.shape, X_test_SS.shape, y_train.shape, y_valid.shape ,y_test.shape)

(685, 11) (338, 11) (505, 11) (685,) (338,) (505,)


In [95]:
y_trainval = np.expand_dims(y_trainval, axis=1)
y_train = np.expand_dims(y_train, axis=1)
y_valid = np.expand_dims(y_valid, axis=1)
y_test = np.expand_dims(y_test, axis=1)

In [96]:
dataset = CustomDataset(X_train_SS, y_train)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

In [97]:
class Regression(torch.nn.Module):
    def __init__(self, input, output):
        print('Regression Model Init')
        super(Regression, self).__init__()
        self.layer1 = torch.nn.Linear(input, 30)
        self.layer2 = torch.nn.Linear(30, 50)
        self.layer3 = torch.nn.Linear(50, 100)
        self.layer4 = torch.nn.Linear(100, 200)
        self.layer5 = torch.nn.Linear(200, 100)
        self.layer6 = torch.nn.Linear(100, 30)
        self.layer7 = torch.nn.Linear(30, output)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        x = F.relu(self.layer4(x))
        x = F.relu(self.layer5(x))
        x = F.relu(self.layer6(x))
        x = self.layer7(x)

        return x

In [98]:
best_mse = 1

model = Regression(11, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0005) 
train_loss = []
nb_epochs = 10000
for epoch in range(nb_epochs + 1):
    model.train()
    running_loss = 0.0
    num_cnt = 0

    for batch_idx, samples in enumerate(dataloader):
        x_sample, y_sample = samples
        optimizer.zero_grad()
        
        with torch.set_grad_enabled(True):
            output = model(x_sample)
            criterion = torch.nn.MSELoss()
            loss = criterion(output, y_sample)
            loss.backward()
            optimizer.step()

        running_loss += loss.item() * x_sample.size(0)
        num_cnt += len(y_sample)

    epoch_loss = float(running_loss / num_cnt)

    train_loss.append(epoch_loss)

    model.eval()

    x_test_tensor = torch.FloatTensor(X_valid_SS)
    predictions = model(x_test_tensor).tolist()
    mse = np.sum((y_valid - predictions)**2) / len(predictions)

    if best_mse > mse:
        best_mse = mse
        best_model = copy.deepcopy(model.state_dict())
    if epoch % 100 == 0:
        print('{} / {}'.format(epoch, nb_epochs))
print('best MSE : ',mse)

Regression Model Init
0 / 10000
100 / 10000
200 / 10000
300 / 10000
400 / 10000
500 / 10000
600 / 10000
700 / 10000
800 / 10000
900 / 10000
1000 / 10000
1100 / 10000
1200 / 10000
1300 / 10000
1400 / 10000
1500 / 10000
1600 / 10000
1700 / 10000
1800 / 10000
1900 / 10000
2000 / 10000
2100 / 10000
2200 / 10000
2300 / 10000
2400 / 10000
2500 / 10000
2600 / 10000
2700 / 10000
2800 / 10000
2900 / 10000
3000 / 10000
3100 / 10000
3200 / 10000
3300 / 10000
3400 / 10000
3500 / 10000
3600 / 10000
3700 / 10000
3800 / 10000
3900 / 10000
4000 / 10000
4100 / 10000
4200 / 10000
4300 / 10000
4400 / 10000
4500 / 10000
4600 / 10000
4700 / 10000
4800 / 10000
4900 / 10000
5000 / 10000
5100 / 10000
5200 / 10000
5300 / 10000
5400 / 10000
5500 / 10000
5600 / 10000
5700 / 10000
5800 / 10000
5900 / 10000
6000 / 10000
6100 / 10000
6200 / 10000
6300 / 10000
6400 / 10000
6500 / 10000
6600 / 10000
6700 / 10000
6800 / 10000
6900 / 10000
7000 / 10000
7100 / 10000
7200 / 10000
7300 / 10000
7400 / 10000
7500 / 10000
76

In [100]:
model.load_state_dict(best_model)
model.eval()

x_test_tensor = torch.FloatTensor(X_test_SS)
predictions = model(x_test_tensor).tolist()
mse = np.sum((y_test - predictions)**2) / len(predictions)
rmse = np.sqrt(mse)
mae = np.sum(np.abs((y_test - predictions)/y_test))/len(predictions)
mape = mae * 100

print(mse, rmse, mae, mape)

0.4030854037705683 0.6348900721940518 0.08866726801003112 8.866726801003113


Result  

Data : Standard Scaling  
Best Pytorch Regression MSE : 0.4030
