In [2]:
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
%matplotlib inline

import matplotlib.pyplot as plt

import torch
from torch import nn

In [3]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(8, 6),
            nn.ReLU(),
            nn.Linear(6, 4),
            nn.ReLU(),
            nn.Linear(4, 2),
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 6),
            nn.ReLU(),
            nn.Linear(6, 8),
            
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

### Создадим Dataloader и приведем датасет к Tensor

In [None]:
train_dataset = pd.read_csv('train.csv', index_col=False)
test_dataset = pd.read_csv('test.csv', index_col=False)

print(train_dataset.describe())
print(test_dataset.describe())

In [None]:
from torch.utils.data import TensorDataset, DataLoader

train_target = torch.Tensor(train_dataset.values[:, -1]).view(-1,1)
test_target = torch.Tensor(test_dataset.values[:, -1]).view(-1,1)

train_data = torch.Tensor(train_dataset.values[:, :-1])
test_data = torch.Tensor(test_dataset.values[:, :-1])


train_tensorset = TensorDataset(train_data, train_target)
test_tensorset = TensorDataset(test_data, test_target)


train_size = int(len(train_tensorset) * 0.85)
val_size = len(train_tensorset) - train_size


train, val = torch.utils.data.random_split(train_tensorset, [train_size, val_size])


train_loader = DataLoader(train, batch_size=128, shuffle=True)
val_loader = DataLoader(val, batch_size=128, shuffle=False)
test_loader = DataLoader(test_tensorset, batch_size=128, shuffle=False)

In [None]:
from sklearn.metrics import accuracy_score

def evaluate(model, dataloader, loss_fn):

    y_pred_list = []
    y_true_list = []
    losses = []

    for i, batch in enumerate(tqdm(dataloader)):

        X_batch, y_batch = batch

        with torch.no_grad():

            logits = model(X_batch)

            loss = loss_fn(logits, y_batch)
            loss = loss.item()

            losses.append(loss)

            y_pred = torch.round(logits)

        y_pred_list.extend(y_pred)
        y_true_list.extend(y_batch.numpy())


    accuracy = accuracy_score(y_pred_list, y_true_list)

    return accuracy, np.mean(losses)


In [None]:
def train(model, loss_fn, optimizer, n_epoch=6):

    model.train(True)

    data = {
        'acc_train': [],
        'loss_train': [],
        'acc_val': [],
        'loss_val': []
    }

    for epoch in tqdm(range(n_epoch)):

        for i, batch in enumerate(tqdm(train_loader)):

            X_batch, y_batch = batch

            logits = model(X_batch)

            loss = loss_fn(logits, y_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()

        print('On epoch end', epoch)

        acc_train_epoch, loss_train_epoch = evaluate(model, train_loader, loss_fn)
        print('Train acc:', acc_train_epoch, 'Train loss:', loss_train_epoch)

        acc_val_epoch, loss_val_epoch = evaluate(model, val_loader, loss_fn)
        print('Val acc:', acc_val_epoch, 'Val loss:', loss_val_epoch)


        data['acc_train'].append(acc_train_epoch)
        data['loss_train'].append(loss_train_epoch)
        data['acc_val'].append(acc_val_epoch)
        data['loss_val'].append(loss_val_epoch)

    return model, data


In [None]:
# learning_rates = list(map(float,np.arange(1e-5, 0.1, .007)))
# acc_list = []
# loss_list = []

# for lr in learning_rates:
#     model = Model()

#     loss_function = nn.BCELoss()

#     optimizer = torch.optim.Adam(model.parameters(), lr=lr)

#     model, data = train(model, loss_function, optimizer, n_epoch=5)

#     test_acc, test_loss = evaluate(model, test_loader, loss_function)
#     acc_list.append(test_acc)
#     loss_list.append(test_loss)


In [None]:
# plt.figure(figsize=(16,10))
# plt.xticks(np.arange(min(learning_rates), max(learning_rates), .007))
# plt.plot(learning_rates, loss_list, 'o', alpha=0.6)
# plt.plot(learning_rates, loss_list)

In [None]:
# plt.figure(figsize=(16,10))
# plt.xticks(np.arange(min(learning_rates), max(learning_rates), .007))
# plt.plot(learning_rates, acc_list, 'o', alpha=0.6)
# plt.plot(learning_rates, acc_list)