<a href="https://colab.research.google.com/github/smuzka/SSN-project/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Zespół 4:
- Jakub Smuga
- Konrad Korus
- Maksym Kazhaiev

### Podział danych

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.utils import shuffle

In [None]:
random_shuffle_state = 2024

def train_valid_test_split(features, targets, valid_p = 0.1, test_p = 0.3):
  # shuffle the features and targets in the same way
  features = shuffle(features, random_state = random_shuffle_state)
  targets = shuffle(targets, random_state = random_shuffle_state)
  train_size = int(len(features) * (1 - (test_p + valid_p)))
  valid_size = int(len(features) * valid_p)

  X_train, X_valid, X_test = features[:train_size], features[train_size:train_size + valid_size], features[train_size + valid_size:]
  y_train, y_valid, y_test = targets[:train_size], targets[train_size:train_size + valid_size], targets[train_size + valid_size:]
  return (X_train, y_train, X_valid, y_valid, X_test, y_test)

### Model

In [None]:
import torch.nn as nn
import torch.optim as optim

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

### Funkcja ucząca model

In [None]:
def train(model, X_train_tensor, y_train_tensor, X_valid_tensor, y_valid_tensor):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    epochs = 10
    batch_size = 32

    for epoch in range(epochs):
        for i in range(0, len(X_train_tensor), batch_size):
            inputs = X_train_tensor[i:i+batch_size]
            labels = y_train_tensor[i:i+batch_size]

            optimizer.zero_grad()

            outputs = model(inputs)

            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

        with torch.no_grad():
            outputs = model(X_train_tensor)
            _, predicted = torch.max(outputs, 1)
            train_accuracy = (predicted == y_train_tensor).sum().item() / len(y_train_tensor)

            outputs = model(X_valid_tensor)
            _, predicted = torch.max(outputs, 1)
            valid_accuracy = (predicted == y_valid_tensor).sum().item() / len(y_valid_tensor)

            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Training Accuracy: {train_accuracy:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

## Funkcja sprawdzająca model

In [132]:
def evaluate(model, X_test_tensor, y_test_tensor):
    model.eval()

    with torch.no_grad():
        outputs = model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)

    accuracy = (predicted == y_test_tensor).sum().item() / len(y_test_tensor)

    return accuracy

## Pobieranie danych

### Internet firewall data

In [None]:
import os
if not os.path.exists('firewall_data.zip'):
    # If the file doesn't exist, download it
  !pip install wget
  !wget https://archive.ics.uci.edu/static/public/542/internet+firewall+data.zip -O firewall_data.zip
  !unzip firewall_data.zip

import pandas as pd

data = pd.read_csv('log2.csv')

data.dropna(inplace=True)

label_encoder = LabelEncoder()
data['Action'] = label_encoder.fit_transform(data['Action'])

X = data.drop('Action', axis=1)
y = data['Action']

In [None]:
X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(X, y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

y_train = y_train.to_numpy()
y_valid = y_valid.to_numpy()
y_test = y_test.to_numpy()

input_dim = X_train.shape[1]
hidden_dim = 4
output_dim = len(label_encoder.classes_)

model = MLP(input_dim, hidden_dim, output_dim)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [133]:
train(model, X_train_tensor, y_train_tensor, X_valid_tensor, y_valid_tensor)

Epoch 1/10, Loss: 0.0078, Training Accuracy: 0.9889, Valid Accuracy: 0.9890
Epoch 2/10, Loss: 0.0077, Training Accuracy: 0.9890, Valid Accuracy: 0.9890
Epoch 3/10, Loss: 0.0075, Training Accuracy: 0.9894, Valid Accuracy: 0.9894
Epoch 4/10, Loss: 0.0074, Training Accuracy: 0.9895, Valid Accuracy: 0.9894
Epoch 5/10, Loss: 0.0073, Training Accuracy: 0.9897, Valid Accuracy: 0.9895
Epoch 6/10, Loss: 0.0073, Training Accuracy: 0.9899, Valid Accuracy: 0.9897
Epoch 7/10, Loss: 0.0073, Training Accuracy: 0.9900, Valid Accuracy: 0.9898
Epoch 8/10, Loss: 0.0074, Training Accuracy: 0.9901, Valid Accuracy: 0.9900
Epoch 9/10, Loss: 0.0075, Training Accuracy: 0.9902, Valid Accuracy: 0.9902
Epoch 10/10, Loss: 0.0075, Training Accuracy: 0.9902, Valid Accuracy: 0.9902


In [134]:
evaluate(model, X_test_tensor, y_test_tensor)

0.9908445868619822

## BANANA

In [136]:
data = pd.read_csv('banana_quality.csv')

X = data.drop('Quality', axis=1)
y = data['Quality']

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(X, y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

input_dim = X_train.shape[1]
hidden_dim = 4
output_dim = len(label_encoder.classes_)


model = MLP(input_dim, hidden_dim, output_dim)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train(model, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor)

Epoch 1/10, Loss: 0.6801, Training Accuracy: 0.4979, Valid Accuracy: 0.5025
Epoch 2/10, Loss: 0.4507, Training Accuracy: 0.8900, Valid Accuracy: 0.8719
Epoch 3/10, Loss: 0.2976, Training Accuracy: 0.9050, Valid Accuracy: 0.8881
Epoch 4/10, Loss: 0.2461, Training Accuracy: 0.9227, Valid Accuracy: 0.9087
Epoch 5/10, Loss: 0.2169, Training Accuracy: 0.9302, Valid Accuracy: 0.9175
Epoch 6/10, Loss: 0.1917, Training Accuracy: 0.9394, Valid Accuracy: 0.9250
Epoch 7/10, Loss: 0.1715, Training Accuracy: 0.9458, Valid Accuracy: 0.9319
Epoch 8/10, Loss: 0.1562, Training Accuracy: 0.9502, Valid Accuracy: 0.9369
Epoch 9/10, Loss: 0.1450, Training Accuracy: 0.9527, Valid Accuracy: 0.9387
Epoch 10/10, Loss: 0.1365, Training Accuracy: 0.9556, Valid Accuracy: 0.9437


In [137]:
evaluate(model, X_test_tensor, y_test_tensor)

0.94375