In [43]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import torch
from torch import optim
from torch import nn
from torch.utils.data import TensorDataset, DataLoader, random_split
from tqdm import tqdm
import torch.optim.lr_scheduler as lr_scheduler

# !pip install torchvision
import torchvision

import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# !pip install torchmetrics
import torchmetrics

from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

In [44]:
df = pd.read_csv("/kaggle/input/data-train-csv/data_train.csv")
features, labels = df.iloc[:, :2], df.iloc[:, 4]

# 轉成tensor
features = torch.tensor(features.values, dtype = torch.float32)
labels = torch.tensor(labels.values, dtype = torch.float32)

dataset = TensorDataset(features, labels)

train_size = int(len(dataset) * 0.8)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = 64)
print(train_size)

20000


In [45]:
class network(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(2, 256)
        self.linear2 = nn.Linear(256, 256)
        self.linear3 = nn.Linear(256, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x = self.sigmoid(self.linear1(x))
        x = self.sigmoid(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x
device = torch.device("cpu")
model = network().to(device)
print(model)

network(
  (linear1): Linear(in_features=2, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=256, bias=True)
  (linear3): Linear(in_features=256, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [51]:
loss_function = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)

scheduler = lr_scheduler.ReduceLROnPlateau( #自動調學習率
    optimizer, 
    mode = 'min', # 監控驗證損失
    factor =0.1,  # 學習率衰減係數
    patience = 5, # 容忍n個epoch無改善
    verbose = True, # 打印調整日誌
    min_lr = 1e-3 # 最小學習率
)
for i in range(5):
    model.train()
    for batch_i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()
        x = x.to(device)
        y = y.to(device)
        y = y.view(-1, 1)
        outputs = model(x)
        loss = loss_function(outputs, y)
        loss.backward()
        optimizer.step()
        pred = outputs.argmax(dim = 1) #dim = 1代表批次中的每個樣本中的10個類別選擇最高分的索引 (橫看)
        #dim = 0是縱看
        train_loss = float(loss) #將loss由tensor變回float方便進行運算
        train_acc = accuracy_score(y.numpy(), pred.numpy()) #必須使用cpu和numpy陣列 / list
    print(f"Epoch {i + 1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc * 100:.2f}%")
    model.eval()
    total_loss = 0
    sample = 0
    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)
            y = y.view(-1, 1)
            outputs = model(x)
            total_loss += float(loss_function(outputs, y)) * x.size(0) # x.size(0)是當前batch的size
            sample += x.size(0)
    scheduler.step(float(total_loss / sample))
        

Epoch 1, Train Loss: 0.6208, Train Accuracy: 53.12%
Epoch 2, Train Loss: 0.6706, Train Accuracy: 59.38%
Epoch 3, Train Loss: 0.6809, Train Accuracy: 56.25%
Epoch 4, Train Loss: 0.6235, Train Accuracy: 53.12%
Epoch 5, Train Loss: 0.5926, Train Accuracy: 71.88%


In [49]:
torch.save(model.state_dict(), 'submission_dic.pth')