## 1. Dataset Class 선언
- Data: Spam베이스라는 57개의 feature을 통해서 spam이메일인지 아닌지 여부를 확인하는 데이터셋, github 참고 

In [69]:
# 코렙용
# from google.colab import drive
# drive.mount('/content/drive')

In [70]:
import pandas as pd
import numpy as np
# torch dataloader
from torch.utils.data import DataLoader, Dataset
import torch
#train_test_split
class SpamDataloader(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]



class Spam:

    def __init__(self):
        self.train_dir = 'src/dataset/spambase' + '/spambase.data'

        pass

    def create_data(self):
        X = pd.read_csv(self.train_dir, sep=',', header=None)
        #data.dropna(axis=1, how='all', inplace=True)
        X=X.values
        Y=X[:,-1]
        X=X[:,:-1]
        Y = Y.astype(int)
        from sklearn.model_selection import train_test_split
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
        X_train = torch.tensor(X_train).float()
        Y_train = torch.tensor(Y_train).float()
        X_test = torch.tensor(X_test).float()
        Y_test = torch.tensor(Y_test).float()
        Spam_train = SpamDataloader(X_train, Y_train)
        Spam_test = SpamDataloader(X_test, Y_test)
        return Spam_train, Spam_test


## Simple Neural Network 선언

In [71]:
import torch.nn as nn
class SimpleNN(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

        
    def forward(self, x):
        # x: batch_size x 10
        x = self.linear1(x)
        #question
        # batchsize x 5
        x = self.linear2(x)
        # x: batch_size x 1
        x = self.sigmoid(x)
        # x: batch_size x 1
        
        return x
    

## Trainer Class 선언: Training과 Testing 둘다 진행가능

In [72]:

class Trainer():

    def __init__(self, model, dataloader, optimizer, loss_fn,device):
        # model, dataloader, optimizer, loss_fn
        self.model = model
        self.dataloader = dataloader
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.device = device
        #self.model=self.model.to(device) #  move the model to the device
    
    def train(self, num_epochs):
        for epoch in range(num_epochs):
            for (x, y) in self.dataloader:
                x=x.to(self.device)
                y=y.to(self.device)
                #print(x.device)
                y_pred = self.model(x)
                y_pred = y_pred.squeeze()
                loss = self.loss_fn(y_pred, y)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()


            print("Epoch: {}, Loss: {}".format(epoch, loss.item()))

    def test(self,test_dataloader):
        with torch.no_grad():
            self.model.eval()
            for (x, y) in test_dataloader:
                x=x.to(self.device)
                y=y.to(self.device)
                y_pred = self.model(x)
                y_pred = y_pred.squeeze()
                loss = self.loss_fn(y_pred, y)
                print("Test Loss: {}".format(loss.item()))

## 실제 실행 

In [73]:
spam = Spam()
Spam_train,Spam_test = spam.create_data() # X: train, Y: train_label, X: test

# Create a dataloader
train_dataloader = DataLoader(Spam_train, batch_size=32, shuffle=True)
test_dataloader = DataLoader(Spam_test, batch_size=3000, shuffle=True) # batch_size = 3000을 한 이유는 test data 전체를 한번에 testing 하기 위함임. 


# Create a model
model = SimpleNN(input_dim=57, hidden_dim=10,output_dim= 1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Create an optimizer
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=0.0001)
# Create a loss function
loss_fn = torch.nn.BCELoss()
model=model.to(device)
# Create a trainer
trainer = Trainer(model, train_dataloader, optimizer, loss_fn,device)
# Train the model
trainer.train(num_epochs=50)
# Test the model
trainer.test(test_dataloader)


Epoch: 0, Loss: 2.9076948165893555
Epoch: 1, Loss: 0.6952906250953674
Epoch: 2, Loss: 0.4539429247379303
Epoch: 3, Loss: 0.9177824258804321
Epoch: 4, Loss: 0.5577143430709839
Epoch: 5, Loss: 0.49743854999542236
Epoch: 6, Loss: 0.502269983291626
Epoch: 7, Loss: 0.5577268600463867
Epoch: 8, Loss: 0.4853273034095764
Epoch: 9, Loss: 0.5292458534240723
Epoch: 10, Loss: 0.43183383345603943
Epoch: 11, Loss: 0.4347705543041229
Epoch: 12, Loss: 0.4310908615589142
Epoch: 13, Loss: 0.3465205430984497
Epoch: 14, Loss: 0.3950994610786438
Epoch: 15, Loss: 0.45020347833633423
Epoch: 16, Loss: 0.35658490657806396
Epoch: 17, Loss: 0.4022830128669739
Epoch: 18, Loss: 0.30515217781066895
Epoch: 19, Loss: 0.43259096145629883
Epoch: 20, Loss: 0.35360249876976013
Epoch: 21, Loss: 0.35897523164749146
Epoch: 22, Loss: 0.38569721579551697
Epoch: 23, Loss: 0.36042067408561707
Epoch: 24, Loss: 0.43549373745918274
Epoch: 25, Loss: 0.2971787452697754
Epoch: 26, Loss: 0.2725640833377838
Epoch: 27, Loss: 0.285248637

## 추가로

In [74]:
import argparse
import torch.optim as optim
import torch

parser = argparse.ArgumentParser(description='Spam Classifier')
parser.add_argument('--num_hidden', type=int, default=10, help='number of hidden units')
parser.add_argument('--num_epochs', type=int, default=50, help='number of epochs')
parser.add_argument('--lr', type=float, default=0.0021, help='learning rate')
args = parser.parse_args("")

import torch.nn as nn
class SimpleNN2(nn.Module):

    def __init__(self, args):
        super().__init__()
        self.linear1 = nn.Linear(57, args.num_hidden)
        self.linear2 = nn.Linear(args.num_hidden, 1)
        self.sigmoid = nn.Sigmoid()

        
    def forward(self, x):
        # x: batch_size x 10
        x = self.linear1(x)
        #question
        # batchsize x 5
        x = self.linear2(x)
        # x: batch_size x 1
        x = self.sigmoid(x)
        # x: batch_size x 1
        return x
    

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   
model = SimpleNN2(args)
model=model.to(device)

# Create an optimizer
class Trainer2():

    def __init__(self, model, dataloader, args,device):
        # model, dataloader, optimizer, loss_fn
        self.model = model
        self.dataloader = dataloader
        self.optimizer = optim.Adam(model.parameters(), lr=args.lr)
        self.loss_fn = torch.nn.BCELoss()
        self.device = device
    
    def train(self, num_epochs):
        for epoch in range(num_epochs):
            for (x, y) in self.dataloader:
                x=x.to(self.device)
                y=y.to(self.device)
                y_pred = self.model(x)
                y_pred = y_pred.squeeze()
                loss = self.loss_fn(y_pred, y)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()


            print("Epoch: {}, Loss: {}".format(epoch, loss.item()))

    def test(self,test_dataloader):
        with torch.no_grad():
            self.model.eval()
            for (x, y) in test_dataloader:
                x=x.to(self.device)
                y=y.to(self.device)
                y_pred = self.model(x)
                y_pred = y_pred.squeeze()
                loss = self.loss_fn(y_pred, y)
                print("Test Loss: {}".format(loss.item()))
    


#이런식으로 모델에 args라는 인자를 넣어주어 한꺼번에 사용할 수 있음
# Create a model
#model = SimpleNN2(args)
trainer = Trainer2(model, train_dataloader, args,device)

# Train the model
trainer.train(num_epochs=50)
# Test the model

trainer.test(test_dataloader)

Epoch: 0, Loss: 0.4391304552555084
Epoch: 1, Loss: 0.19496846199035645
Epoch: 2, Loss: 1.0545012950897217
Epoch: 3, Loss: 0.27353841066360474
Epoch: 4, Loss: 0.1685471534729004
Epoch: 5, Loss: 0.3707175850868225
Epoch: 6, Loss: 0.23777194321155548
Epoch: 7, Loss: 0.19678068161010742
Epoch: 8, Loss: 0.253887802362442
Epoch: 9, Loss: 0.21503892540931702
Epoch: 10, Loss: 0.20548155903816223
Epoch: 11, Loss: 0.1486048400402069
Epoch: 12, Loss: 0.6156580448150635
Epoch: 13, Loss: 0.14855632185935974
Epoch: 14, Loss: 0.13399551808834076
Epoch: 15, Loss: 0.13901634514331818
Epoch: 16, Loss: 0.389761745929718
Epoch: 17, Loss: 0.2179463505744934
Epoch: 18, Loss: 0.15224000811576843
Epoch: 19, Loss: 0.21589359641075134
Epoch: 20, Loss: 0.23532156646251678
Epoch: 21, Loss: 0.33786511421203613
Epoch: 22, Loss: 0.19920089840888977
Epoch: 23, Loss: 3.5299856662750244
Epoch: 24, Loss: 0.1244935467839241
Epoch: 25, Loss: 0.15238313376903534
Epoch: 26, Loss: 0.16849815845489502
Epoch: 27, Loss: 0.18987