In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

### Make_ classification 함수 사용하여 데이터 셋 만들기

In [2]:
X, y = make_classification(n_samples=1000,
                          n_features=5, 
                          n_informative=2,
                          n_redundant=0,
                          n_clusters_per_class= 1,
                          random_state =42)
print(X)
print(y)

[[ 0.36754094  0.84477438 -0.00556029 -2.3919562   1.73937892]
 [ 1.64097346 -0.37231905  0.43731319 -1.88315021 -0.56791512]
 [ 0.70233177 -1.81797923  0.7597123   0.14358794 -1.16508298]
 ...
 [ 1.19518427  0.58529944  0.5148836   0.14583647  1.78516406]
 [ 1.76203985 -0.39201264  1.05993638  0.69347891 -0.64572681]
 [ 2.19524995  0.63732468  1.61630004  1.38902436 -0.36926219]]
[1 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 1 1 0 0 1 1 0 0 0 0 0
 1 0 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 1
 1 1 0 1 1 1 1 0 1 0 1 1 1 0 1 0 0 0 0 1 1 1 0 1 0 0 1 0 0 0 1 0 0 1 1 1 1
 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 1 1 1 1 1 1
 0 1 1 0 0 0 0 0 1 1 0 1 0 1 1 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 0
 0 1 1 1 0 0 1 0 0 0 0 1 1 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1
 1 1 0 0 1 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 1 0 0 0 1 1 1 0 0 1 0 0 0 0
 0 1 1 0 1 0 1 0 0 1 1 1 0 1 1 0 0 0 1 1 0 1 0 1 0 0 1 0 0 0 1 1 0 1 0 1 0
 1 1 0 1 1 1 1 1

### 데이터셋을 train set 과 test set 으로 나누기

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("X_train 데이터 개수 : ", len(X_train))
print("X_test 데이터 개수 : ", len(X_test))
print("y_train 데이터 개수 : ", len(y_train))
print("y_test 데이터 개수 : ", len(y_test))

X_train 데이터 개수 :  700
X_test 데이터 개수 :  300
y_train 데이터 개수 :  700
y_test 데이터 개수 :  300


### 데이터셋 로딩

In [4]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle = False)

### 모델 정의 
* 5개의 특성을 가진 데이터셋 가정, 1개의 뉴런을 가진 출력층 만들기

In [5]:
class LogisticRegression(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        
    def forward(self, x):
        out = self.linear(x)
        out = torch.sigmoid(out)
        return out
    
model = LogisticRegression(input_dim=5)

### 모델을 학습시키기 전에, 학습에 필요한 Loss function, optimizer 선언 

In [7]:
from adamp import SGDP

"""
# define your params
optimizer = SGDP(params, lr=0.1, weigth_decay=le-5, momentum=0.9 , neaterov=True)
"""

criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)
print(optimizer)

# 왜 BCELoss() 사용했을까요??  => 0과 1 분류 이진 분류라서! 

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)


### Train loop 구현

In [8]:
num_epochs = 100

for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()
        
    if epoch % 10 == 0: 
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss:{loss.item():.4f}')
        
        
        """
        SGDP 
        Epoch [1/100], Loss:0.8144
        Epoch [11/100], Loss:0.4019
        Epoch [21/100], Loss:0.3072
        Epoch [31/100], Loss:0.2677
        Epoch [41/100], Loss:0.2835
        Epoch [51/100], Loss:0.1820
        Epoch [61/100], Loss:0.2643
        Epoch [71/100], Loss:0.2723
        Epoch [81/100], Loss:0.2875
        Epoch [91/100], Loss:0.3213
        """

Epoch [1/100], Loss:0.8144
Epoch [11/100], Loss:0.4019
Epoch [21/100], Loss:0.3072
Epoch [31/100], Loss:0.2677
Epoch [41/100], Loss:0.2835
Epoch [51/100], Loss:0.1820
Epoch [61/100], Loss:0.2643
Epoch [71/100], Loss:0.2723
Epoch [81/100], Loss:0.2875
Epoch [91/100], Loss:0.3213


### 평가 코드 작성

In [12]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: ", device)

model.eval()
with torch.no_grad():
    correct = 0
    total = 0 
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
        
    print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

Using device:  cpu
Accuracy of the network on the test images: 50 %
