In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset # Using TensorDataset for simple examples

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Simulate Neural Network

In [None]:
# prepare input
inp = torch.tensor([0.5, -1.0]).to(device)
inp.cpu().numpy()

In [None]:
# make neural network
neural1 = nn.Linear(2, 1).to(device)
print(neural1.weight.data.cpu().numpy())
print(neural1.bias.data.cpu().numpy())

In [None]:
# weighted sum
wx_b = neural1(inp)
wx_b.item()

In [None]:
# activation function
out = torch.sigmoid(wx_b)
out.item()

# Multi-Layer Neural Network

In [None]:
# make neural network
neural1 = nn.Linear(2, 3).to(device)
print(neural1.weight.data.cpu().numpy())
print(neural1.bias.data.cpu().numpy())

In [None]:
neural2 = nn.Linear(3, 1).to(device)
print(neural2.weight.data.cpu().numpy())
print(neural2.bias.data.cpu().numpy())

In [None]:
# weighted sum
wx_b1 = neural1(inp)
wx_b2 = neural2(wx_b1)
print(wx_b1.data.cpu().numpy())
print(wx_b2.item())

In [None]:
# activation function
out = torch.sigmoid(wx_b2)
out.item()

# Activation Functions
- 비선형이 없으면 아무리 깊어도 전체가 하나의 선형변환과 같음.
- 비선형이 들어가야 XOR 같은 비선형 패턴·복잡한 결정 경계를 학습할 수 있음

In [None]:
x_vals = torch.linspace(-6, 6, 100) # Input values for plotting

In [None]:
x_vals.numpy().round(2), len(x_vals)

In [None]:
# Sigmoid
sigmoid_fn = nn.Sigmoid()
y_sigmoid = sigmoid_fn(x_vals)
y_sigmoid.numpy()

In [None]:
# Tanh
tanh_fn = nn.Tanh()
y_tanh = tanh_fn(x_vals)
y_tanh

In [None]:
# ReLU
relu_fn = nn.ReLU()
y_relu = relu_fn(x_vals)
y_relu

In [None]:
# Leaky ReLU
leaky_relu_fn = nn.LeakyReLU(negative_slope=0.1)
y_leaky_relu = leaky_relu_fn(x_vals)
y_leaky_relu

In [None]:
# Softmax (applied to a sample batch of logits)
softmax_fn = nn.Softmax(dim=1)
sample_logits = torch.tensor([[1.0, -0.5, 2.0], [0.1, 0.5, 0.2]]) # Batch of 2, 3 classes
y_softmax = softmax_fn(sample_logits)
print(y_softmax)
print(y_softmax[0,:].sum())
print(y_softmax[1,:].sum())

In [None]:
# Plotting
plt.figure(figsize=(10, 6))

plt.subplot(2, 2, 1)
plt.plot(x_vals.numpy(), y_sigmoid.numpy(), label='Sigmoid')
plt.title('Sigmoid: 1 / (1 + exp(-x))')
plt.xlabel('x'); plt.ylabel('f(x)'); plt.grid(True); plt.legend()

plt.subplot(2, 2, 2)
plt.plot(x_vals.numpy(), y_tanh.numpy(), label='Tanh')
plt.title('Tanh: (exp(x) - exp(-x)) / (exp(x) + exp(-x))')
plt.xlabel('x'); plt.ylabel('f(x)'); plt.grid(True); plt.legend()

plt.subplot(2, 2, 3)
plt.plot(x_vals.numpy(), y_relu.numpy(), label='ReLU')
plt.title('ReLU: max(0, x)')
plt.xlabel('x'); plt.ylabel('f(x)'); plt.grid(True); plt.legend()

plt.subplot(2, 2, 4)
plt.plot(x_vals.numpy(), y_leaky_relu.numpy(), label='Leaky ReLU (slope=0.1)')
plt.title('Leaky ReLU: max(0.1*x, x)')
plt.xlabel('x'); plt.ylabel('f(x)'); plt.grid(True); plt.legend()

plt.tight_layout()
plt.show()
plt.close()

# Multi-Layer Perceptrons (MLP) - Deep Neural Network

In [None]:
class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        # nn.Module을 초기화 (파이토치의 모든 모델은 이걸 상속)
        super(SimpleMLP, self).__init__()

        # 완전연결(선형) 층 1: 입력 특징(input_size) -> 은닉 특징(hidden_size)
        # 예) 784(28x28 이미지) -> 128
        self.fc1 = nn.Linear(input_size, hidden_size)

        # ReLU 활성화: 음수는 0으로, 양수는 그대로 통과 (학습을 돕는 비선형 함수)
        self.relu1 = nn.ReLU()

        # 완전연결 층 2: hidden_size -> hidden_size
        # (두 번째 은닉층, 차원을 그대로 유지하는 설정)
        self.fc2 = nn.Linear(hidden_size, hidden_size)

        # 두 번째 ReLU
        self.relu2 = nn.ReLU()

        # 출력층: hidden_size -> num_classes
        # 분류 문제라면 클래스 수만큼 로짓(logits)을 뽑아냄
        self.fc3 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x: 입력 텐서, 모양은 [batch_size, input_size]
        # 예) 배치 크기 32, 입력 784라면 [32, 784]

        out = self.fc1(x)     # [batch_size, hidden_size]
        out = self.relu1(out) # 비선형 통과

        out = self.fc2(out)   # [batch_size, hidden_size]
        out = self.relu2(out) # 비선형 통과

        out = self.fc3(out)   # [batch_size, num_classes] (각 클래스의 점수=로짓)

        return out            # 주의: 보통 분류 손실(CrossEntropyLoss)은 softmax 없이 '로짓'을 그대로 받습니다.


In [None]:
input_dim = 100  # 입력 특성(피처) 차원
hidden_dim = 64  # 은닉층 차원
output_dim = 5   # 분류할 클래스 개수

In [None]:
model = SimpleMLP(input_dim, hidden_dim, output_dim).to(device)
model

In [None]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
batch_size = 4
dummy_input = torch.randn(batch_size, input_dim).to(device) # [batch_size, input_features]
dummy_input.shape

In [None]:
with torch.no_grad(): # 예측시 기울기(gradient) 계산이 필요 없습니다
    predictions = model(dummy_input)

In [None]:
print(predictions.shape)
print(predictions[0].cpu().numpy())

# Loss Functions
- 학습을 하기 위해 모델의 error를 계산

## Mean Squard Error : 회귀

In [None]:
# 예: y = 2x + 1 회귀 문제
x = torch.tensor([[0.0],[1.0],[2.0],[3.0]])
y_true = 2*x + 1  # 타깃: [[1],[3],[5],[7]]

In [None]:
# 가짜 예측(모델 출력이라 가정) - 로짓/확률 개념 없음
y_pred = torch.tensor([[0.9],[2.7],[5.4],[6.2]])

In [None]:
mse = nn.MSELoss()
loss_mse = mse(y_pred, y_true)

In [None]:
print("y_true:\n", y_true)
print("y_pred:\n", y_pred)
print("MSELoss:", loss_mse.item(), "\n")

## Cross-Entropy : 다중 클래스, 단일 라벨 분류

In [None]:
# 배치 크기 N=2, 클래스 수 C=3
# 입력은 "확률"이 아니라 "로그릿(logits)" 이어야 함
logits = torch.tensor([
    [2.0, 0.5, -1.0],  # 샘플 1의 각 클래스 로짓
    [0.1, 1.2, 2.3],   # 샘플 2
])

In [None]:
# 타깃은 각 샘플의 정답 클래스 "인덱스"
targets = torch.tensor([0, 2])  # 정답: 샘플1->클래스0, 샘플2->클래스2

In [None]:
cel = nn.CrossEntropyLoss()
loss_ce = cel(logits, targets)

In [None]:
probs = logits.softmax(dim=1)  # 보기 편하려고만 계산(손실 계산엔 필요 X)
print("logits:\n", logits)
print("softmax(probs):\n", probs)
print("targets (class indices):", targets.tolist())
print("CrossEntropyLoss:", loss_ce.item())

# (⚠️주의) 원-핫 타깃을 CrossEntropyLoss에 직접 주면 오류/의도와 다름
# onehot = F.one_hot(targets, num_classes=3).float()
# cel(logits, onehot)  # <-- 잘못된 사용

# Binary Cross-Entropy: 이진, 멀티 라벨 분류

In [None]:
logits_ml = torch.tensor([
    [ 1.2, -0.3,  0.8, -2.0],   # 샘플1의 각 라벨 로짓
    [-1.0,  2.0, -0.5,  0.3],   # 샘플2
])

In [None]:
# 타깃은 각 라벨에 대해 0/1 (실수형)
targets_ml = torch.tensor([
    [1., 0., 1., 0.],  # 샘플1의 정답 라벨들
    [0., 1., 0., 0.],  # 샘플2
])

In [None]:
bcewl = nn.BCEWithLogitsLoss()
loss_bcewl = bcewl(logits_ml, targets_ml)

In [None]:
probs_ml = torch.sigmoid(logits_ml)  # 보기 편하려고만 계산(손실 계산엔 필요 X)
print("logits (multi-label):\n", logits_ml)
print("sigmoid(probs):\n", probs_ml)
print("targets (multi-hot):\n", targets_ml)
print("BCEWithLogitsLoss:", loss_bcewl.item())

In [None]:
# 클래스 불균형: 특정 라벨의 '양성(1)'을 더 중요시하고 싶을 때
pos_weight = torch.tensor([1.0, 3.0, 1.0, 1.0])  # 두 번째 라벨을 3배 가중 (드문 라벨을 더 중요하게 보도록 하는 가중치)
bcewl_weighted = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
loss_bcewl_w = bcewl_weighted(logits_ml, targets_ml)
print("BCEWithLogitsLoss (pos_weight=[1,3,1,1]):", loss_bcewl_w.item())

# Optimizers
- 학습하는 방법 : 모델 파라미터(W:weight, B:bias)를 학습. loss function 값이 최소화 도로록 만듬.

## Loading Data

In [None]:
# Create dummy input, target
dummy_input = torch.randn(5, 10).to(device)
dummy_target = torch.randn(5, 2).to(device)

In [None]:
dummy_input.shape, dummy_target.shape

## Creaate Model

In [None]:
# Create a dummy model for optimizer demonstration
dummy_model = nn.Linear(10, 2).to(device) # 10 input features, 2 output features

## Define Loss Function

In [None]:
loss_fn = nn.MSELoss()

## Optimizer

In [None]:
# Adam Optimizer
optimizer = optim.Adam(dummy_model.parameters(), lr=0.001)

In [None]:
dummy_model.weight

In [None]:
# SGD Example Step
optimizer.zero_grad()                     # Clear previous gradients
outputs = dummy_model(dummy_input)        # Forward pass
loss = loss_fn(outputs, dummy_target) # Calculate loss
loss.backward()                           # Backward pass (compute gradients)
optimizer.step()                          # Update weights

In [None]:
dummy_model.weight

# Neural Network For XOR Problem

## Loading Data

In [None]:
X_xor = torch.tensor([[0., 0.], [0., 1.], [1., 0.], [1., 1.]], device=device)
y_xor = torch.tensor([[0.], [1.], [1.], [0.]], device=device)
X_xor.shape, y_xor.shape, X_xor.dtype

In [None]:
# Create a simple Dataset
xor_dataset = TensorDataset(X_xor, y_xor)

In [None]:
# Create a simple DataLoader
xor_dataloader = DataLoader(xor_dataset, batch_size=4, shuffle=True)

## Define The Model

In [None]:
class XORNet(nn.Module):
    def __init__(self):
        super(XORNet, self).__init__()
        self.fc1 = nn.Linear(2, 8)      # 2 input features, 8 neurons in hidden layer
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(8, 1)      # 8 hidden neurons, 1 output neuron
        # Sigmoid will be applied implicitly by BCEWithLogitsLoss or explicitly after if using BCELoss

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x # Output raw logits

In [None]:
xor_model = XORNet().to(device)
xor_model

## Define Loss Function and Optimizer

In [None]:
loss_fn = nn.BCEWithLogitsLoss() # Handles sigmoid internally, more stable
optimizer = optim.Adam(xor_model.parameters(), lr=0.05) # Adam with a slightly higher LR for faster convergence on XOR

## Learning

In [None]:
num_epochs = 1000

In [None]:
losses_history = []
for epoch in range(num_epochs):
    for inputs, labels in xor_dataloader: # Dataloader handles batching
        # Inputs and labels are already on `device` if X_xor, y_xor were created on device

        # Forward pass
        outputs = xor_model(inputs) # Model outputs raw logits
        loss = loss_fn(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    losses_history.append(loss.item())
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [None]:
# Plot training loss
plt.figure(figsize=(10, 5))
plt.plot(losses_history)
plt.title('Training Loss for XOR Problem')
plt.xlabel('Epoch')
plt.ylabel('BCEWithLogitsLoss')
plt.grid(True)
plt.show()
plt.close()

# Evaluate
- logits는 모델이 계산한 정규화 전 점수(원시 점수)

In [None]:
xor_model.eval() # Set model to evaluation mode (important for layers like dropout, batchnorm)

In [None]:
def predictXOR(data=[1, 0]):
    tensor_data = torch.tensor([data], dtype=torch.float32, device=device)

    with torch.no_grad(): # Disable gradient calculations for inference
        test_predictions_logits = xor_model(tensor_data)
        test_predictions_probs = torch.sigmoid(test_predictions_logits)

    return int((test_predictions_probs >= 0.5).item())

In [None]:
predictXOR([1, 0])