### Pytorch
Prepare data

In [1]:
import pandas as pd

df = pd.read_csv('data/cleaned_data.csv', delimiter = ';')
# Shuffle the DataFrame
df = df.sample(frac = 1, random_state=27)
df.head(10)

Unnamed: 0.1,Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,color
1014,1061,7.0,0.21,0.28,7.5,0.07,45.0,185.0,0.9966,3.34,0.55,9.4,5,0
4752,124,7.8,0.5,0.17,1.6,0.082,21.0,102.0,0.996,3.39,0.48,9.5,5,1
5106,642,9.9,0.54,0.45,2.3,0.071,16.0,40.0,0.9991,3.39,0.62,9.4,5,1
1660,1772,7.8,0.4,0.26,9.5,0.059,32.0,178.0,0.9955,3.04,0.43,10.9,6,0
997,1043,7.5,0.33,0.48,19.45,0.048,55.0,243.0,1.001,2.95,0.4,8.8,5,0
4404,4641,6.6,0.26,0.36,1.2,0.035,43.0,126.0,0.99094,3.01,0.63,11.4,6,0
129,133,6.6,0.24,0.27,15.8,0.035,46.0,188.0,0.9982,3.24,0.51,9.2,5,0
901,944,6.5,0.18,0.33,8.0,0.051,16.0,131.0,0.9965,3.28,0.44,8.7,7,0
848,891,5.8,0.26,0.18,1.2,0.031,40.0,114.0,0.9908,3.42,0.4,11.0,7,0
792,826,6.8,0.25,0.38,8.1,0.046,24.0,155.0,0.9956,3.33,0.59,10.2,6,0


In [2]:
# aj taketo riesenie existuje pip install sklearn
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

train_size = int(len(df) * 0.64)
test_size = int(len(df) * 0.8)

# training set - 64% from all data
train_df = df[:train_size]

# validation set - 16% from all data
val_df = df[train_size:test_size]

# testing set - 20% from all data
test_df = df[test_size:]

print('df -', df.shape[0])
print('---------------')
print('train_df -', train_df.shape[0])
print('val_df -', val_df.shape[0])
print('test_df -', test_df.shape[0])

df - 5919
---------------
train_df - 3788
val_df - 947
test_df - 1184


In [109]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [127]:
class MyDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        x = torch.tensor(self.df.iloc[idx].values.astype('float32'))
        y = torch.tensor(self.df.iloc[idx]['color'].astype('float32'))
        y_onehot = torch.zeros(2)
        y_onehot[int(y)] = 1.0
        return x, y_onehot

# Define your neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(14, 8)
        self.fc2 = nn.Linear(8, 4)
        self.fc3 = nn.Linear(4, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

net = Net()
net.train()

# Define your loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)

batch_size = 64
dataset = MyDataset(df)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(dataloader, 0):
        optimizer.zero_grad()
        outputs = net(inputs)
        # Labels shape and output shape must be same
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 0:
            print(f'epoch: {epoch + 1} loss: {round(running_loss / 100, 4)}')
            running_loss = 0.0

print('Finished Training')

epoch: 1 loss: 0.587
epoch: 2 loss: 0.0061
epoch: 3 loss: 0.005
epoch: 4 loss: 0.0055
epoch: 5 loss: 0.0055
epoch: 6 loss: 0.0052
epoch: 7 loss: 0.0051
epoch: 8 loss: 0.0055
epoch: 9 loss: 0.0046
epoch: 10 loss: 0.0051
Finished Training
