# **Basic**

In [1]:
import torch

In [7]:
# Tensor dari list
x = torch.tensor([1.0, 2.0, 3.0])

# Tensor kosong (nilai acak)
x_empty = torch.empty(2, 3)

# Tensor dengan nol
x_zeros = torch.zeros(2, 2)

# Tensor dengan satu
x_ones = torch.ones(2, 2)

# Tensor dengan distribusi normal
x_rand = torch.randn(3, 3)

print(f'Tensor dari list :\n{x}\n')
print(f'Tensor kosong :\n{x_empty}\n')
print(f'Tensor dengan nol :\n{x_zeros}\n')
print(f'Tensor dengan satu :\n{x_ones}\n')
print(f'Tensor dengan distribusi normal :\n{x_rand}\n')

Tensor dari list :
tensor([1., 2., 3.])

Tensor kosong :
tensor([[-7.7219e+02,  1.5442e-42,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])

Tensor dengan nol :
tensor([[0., 0.],
        [0., 0.]])

Tensor dengan satu :
tensor([[1., 1.],
        [1., 1.]])

Tensor dengan distribusi normal :
tensor([[ 1.9021, -0.7163,  0.6311],
        [-0.8309,  0.3441, -0.0722],
        [-2.0432, -0.8713,  0.0981]])



**Operasi Tensor**

In [9]:
a = torch.tensor([1, 2])
b = torch.tensor([3, 4])

# Penjumlahan
print(a + b)
# Perkalian elemen-wise
print(a * b)
# Dot product
print(torch.dot(a.float(), b.float()))

tensor([4, 6])
tensor([3, 8])
tensor(11.)


**Autograd (Automatic Differentiation)**

In [10]:
x = torch.tensor(2.0, requires_grad=True)
y = x ** 2 + 3 * x + 1
y.backward()  # Hitung turunan
print(x.grad)  # dy/dx

tensor(7.)


# **Atribut**

**Layer**
| No | Layer                                       | Kategori        | Fungsi Umum                                                                 | Contoh Penggunaan                               |
| -- | ------------------------------------------- | --------------- | --------------------------------------------------------------------------- | ----------------------------------------------- |
| 1  | `nn.Linear`                                 | Fully Connected | Layer terhubung penuh. Digunakan hampir di semua model neural network.      | Output layer klasifikasi atau regresi           |
| 2  | `nn.ReLU`, `nn.Sigmoid`, `nn.Tanh`          | Aktivasi        | Menambahkan fungsi aktivasi secara eksplisit.                               | Aktivasi non-linear (ReLU, sigmoid, dll)        |
| 3  | `nn.Dropout`                                | Regularization  | Mencegah overfitting dengan menonaktifkan neuron secara acak saat training. | Model training agar tidak overfit               |
| 4  | `nn.Flatten`                                | Reshaping       | Mengubah tensor menjadi vektor 1D.                                          | Dari output CNN ke Dense layer                  |
| 5  | `torch.nn.Identity()` (sebagai dummy input) | Input Layer     | Tidak wajib eksplisit, biasanya didefinisikan dalam `forward`.              | Input data ke model                             |
| 6  | `nn.Conv2d`                                 | Convolution     | Ekstraksi fitur dari gambar.                                                | Model CNN untuk pengenalan citra                |
| 7  | `nn.MaxPool2d`                              | Pooling         | Mengurangi ukuran fitur map, mempertahankan fitur penting.                  | Setelah Conv2d untuk downsampling               |
| 8  | `nn.BatchNorm1d` / `nn.BatchNorm2d`         | Normalization   | Menstabilkan dan mempercepat pelatihan.                                     | Setelah Conv2d atau Linear                      |
| 9  | `nn.LSTM`                                   | Recurrent       | Mengingat urutan data dalam jangka panjang.                                 | Time series, NLP, prediksi urutan               |
| 10 | `nn.Embedding`                              | NLP             | Mengubah kata (indeks) jadi vektor makna.                                   | Model teks seperti sentiment analysis           |
| 11 | `torch.cat()`                               | Merging         | Menggabungkan beberapa tensor (fitur).                                      | Model dengan input atau output lebih dari satu  |
| 12 | `torch.reshape()` / `.view()`               | Reshaping       | Mengubah bentuk tensor tanpa mengubah datanya.                              | Membentuk ulang input ke format yang diinginkan |


**Fungsi Aktivasi**
| Nama Aktivasi  | Cara Memanggil (OOP)   | Cara Memanggil (Functional) | Kegunaan                                         |
| -------------- | ---------------------- | --------------------------- | ------------------------------------------------ |
| ReLU           | `nn.ReLU()`            | `F.relu(x)`                 | Default non-linear, cepat dan umum               |
| Leaky ReLU     | `nn.LeakyReLU(0.01)`   | `F.leaky_relu(x, 0.01)`     | ReLU dengan grad negatif kecil (anti-dying ReLU) |
| Sigmoid        | `nn.Sigmoid()`         | `torch.sigmoid(x)`          | Output 0–1, cocok untuk klasifikasi biner        |
| Tanh           | `nn.Tanh()`            | `torch.tanh(x)`             | Output -1 sampai 1, sering dipakai di RNN        |
| Softmax        | `nn.Softmax(dim=1)`    | `F.softmax(x, dim=1)`       | Klasifikasi multi-kelas (dipakai di output)      |
| LogSoftmax     | `nn.LogSoftmax(dim=1)` | `F.log_softmax(x, dim=1)`   | Digunakan sebelum `NLLLoss()`                    |
| GELU           | `nn.GELU()`            | `F.gelu(x)`                 | Smooth ReLU, dipakai di Transformer (BERT, GPT)  |
| ELU            | `nn.ELU()`             | `F.elu(x)`                  | ReLU alternatif dengan output negatif smooth     |
| Swish (custom) | `x * torch.sigmoid(x)` | -                           | Aktivasi self-gated, mirip GELU                  |

**Fungsi Loss**
| Nama Loss               | Cara Memanggil                       | Kegunaan                                                            |
| ----------------------- | ------------------------------------ | ------------------------------------------------------------------- |
| Mean Squared Error      | `nn.MSELoss()`                       | Regresi, menghitung rata-rata kuadrat error                         |
| Mean Absolute Error     | `nn.L1Loss()`                        | Regresi, lebih tahan outlier dibanding MSE                          |
| Binary Cross Entropy    | `nn.BCELoss()`                       | Klasifikasi biner (output pakai `sigmoid`)                          |
| Binary CE with Logits   | `nn.BCEWithLogitsLoss()`             | Gabung `sigmoid + BCE` (lebih stabil)                               |
| Cross Entropy           | `nn.CrossEntropyLoss()`              | Klasifikasi multi-kelas (output **tanpa softmax**, langsung logits) |
| Negative Log Likelihood | `nn.NLLLoss()`                       | Digunakan bersama `log_softmax`                                     |
| KL Divergence           | `nn.KLDivLoss()`                     | Distribusi probabilitas (output log dan target probabilitas)        |
| Hinge Loss (manual)     | Custom `F.relu(1 - y_true * y_pred)` | SVM-style loss                                                      |

| Nama Optimizer | Cara Memanggil                                         | Kegunaan                                                 |
| -------------- | ------------------------------------------------------ | -------------------------------------------------------- |
| Stochastic GD  | `optim.SGD(model.parameters(), lr=0.01)`               | Dasar, bisa dengan momentum                              |
| SGD + Momentum | `optim.SGD(model.parameters(), lr=0.01, momentum=0.9)` | Lebih cepat konvergen                                    |
| Adam           | `optim.Adam(model.parameters(), lr=0.001)`             | Umum & powerful, adaptif learning rate                   |
| AdamW          | `optim.AdamW(model.parameters(), lr=0.001)`            | Adam dengan weight decay terpisah (untuk Transformer)    |
| RMSprop        | `optim.RMSprop(model.parameters(), lr=0.001)`          | Baik untuk data sekuensial / non-stasioner               |
| Adagrad        | `optim.Adagrad(model.parameters(), lr=0.01)`           | Learning rate per parameter                              |
| Adadelta       | `optim.Adadelta(model.parameters())`                   | Tidak perlu learning rate awal                           |
| LBFGS          | `optim.LBFGS(model.parameters())`                      | Untuk optimisasi convex (jarang digunakan di deep model) |


**Matrik Regresi**
| Metrik                         | Kelas / Fungsi                                                   | Penjelasan                                             |
| --------------------------------- | ---------------------------------------------------------------- | ------------------------------------------------------ |
| **MSE (Mean Squared Error)**      | `torchmetrics.regression.MeanSquaredError()`                     | Rata-rata kuadrat error antara prediksi dan target     |
| **MAE (Mean Absolute Error)**     | `torchmetrics.regression.MeanAbsoluteError()`                    | Rata-rata nilai absolut selisih                        |
| **MAPE**                          | `torchmetrics.regression.MeanAbsolutePercentageError()`          | Persentase kesalahan absolut rata-rata                 |
| **SMAPE**                         | `torchmetrics.regression.SymmetricMeanAbsolutePercentageError()` | Versi simetris dari MAPE                               |
| **MSLE (Mean Squared Log Error)** | `torchmetrics.regression.MeanSquaredLogError()`                  | Untuk data dengan skala log atau distribusi log-normal |
| **Explained Variance**            | `torchmetrics.regression.ExplainedVariance()`                    | Variansi yang dijelaskan oleh model                    |
| **R2 Score (R-squared)**          | `torchmetrics.regression.R2Score()`                              | Koefisien determinasi, 1 artinya model sempurna        |

**Metrik Klasifikasi Binnary**
| Metrik            | Import                                             | Memanggil          | `y_pred` Format    |  Penjelasan                              |
| -------------------- | ---------------------------------------------------------------- | -------------------------- | --------------------- | ------------------------------------------ |
| Accuracy             | `from torchmetrics.classification import BinaryAccuracy`         | `BinaryAccuracy()`         | Probabilitas (0–1)    | Persentase prediksi benar                  |
| Precision            | `from torchmetrics.classification import BinaryPrecision`        | `BinaryPrecision()`        | Probabilitas          | Ketepatan: TP / (TP + FP)                  |
| Recall               | `from torchmetrics.classification import BinaryRecall`           | `BinaryRecall()`           | Probabilitas          | Sensitivitas: TP / (TP + FN)               |
| F1 Score             | `from torchmetrics.classification import BinaryF1Score`          | `BinaryF1Score()`          | Probabilitas          | Harmonik dari precision dan recall         |
| AUROC                | `from torchmetrics.classification import BinaryAUROC`            | `BinaryAUROC()`            | Probabilitas          | Area under ROC curve                       |
| AUPRC (AvgPrecision) | `from torchmetrics.classification import BinaryAveragePrecision` | `BinaryAveragePrecision()` | Probabilitas          | Area under Precision-Recall Curve          |
| Specificity          | `from torchmetrics.classification import BinarySpecificity`      | `BinarySpecificity()`      | Probabilitas          | True Negative Rate: TN / (TN + FP)         |
| Matthews CorrCoef    | `from torchmetrics.classification import BinaryMatthewsCorrCoef` | `BinaryMatthewsCorrCoef()` | Probabilitas          | Metrik korelasi antara prediksi dan target |
| Confusion Matrix     | `from torchmetrics.classification import BinaryConfusionMatrix`  | `BinaryConfusionMatrix()`  | Probabilitas / Logits | Matriks: TP, TN, FP, FN                    |


**Metrik Klasifikasi Multiclass**
| Metrik         | Import                                                 | Memanggil                           | `y_pred` Format     | Penjelasan                                           |
| ----------------- | -------------------------------------------------------------------- | ------------------------------------------- | ---------------------- | ------------------------------------------------------- |
| Accuracy          | `from torchmetrics.classification import MulticlassAccuracy`         | `MulticlassAccuracy(num_classes=N)`         | Logits (tanpa softmax) | Akurasi total: jumlah benar dibagi total                |
| Precision         | `from torchmetrics.classification import MulticlassPrecision`        | `MulticlassPrecision(num_classes=N)`        | Logits / Probabilitas  | Ketepatan prediksi setiap kelas                         |
| Recall            | `from torchmetrics.classification import MulticlassRecall`           | `MulticlassRecall(num_classes=N)`           | Logits / Probabilitas  | Sensitivitas: menangkap kelas benar                     |
| F1 Score          | `from torchmetrics.classification import MulticlassF1Score`          | `MulticlassF1Score(num_classes=N)`          | Logits / Probabilitas  | Harmonik precision & recall; bisa `average='macro'` dll |
| AUROC             | `from torchmetrics.classification import MulticlassAUROC`            | `MulticlassAUROC(num_classes=N)`            | Probabilitas (0–1)     | Area ROC untuk multi-kelas                              |
| AUPRC             | `from torchmetrics.classification import MulticlassAveragePrecision` | `MulticlassAveragePrecision(num_classes=N)` | Probabilitas           | Area precision-recall per kelas                         |
| Cohen’s Kappa     | `from torchmetrics.classification import MulticlassCohenKappa`       | `MulticlassCohenKappa(num_classes=N)`       | Logits / Probabilitas  | Evaluasi kesepakatan antar prediksi dan target          |
| Matthews CorrCoef | `from torchmetrics.classification import MulticlassMatthewsCorrCoef` | `MulticlassMatthewsCorrCoef(num_classes=N)` | Logits / Probabilitas  | Metrik korelasi antara prediksi dan target kelas        |
| Confusion Matrix  | `from torchmetrics.classification import MulticlassConfusionMatrix`  | `MulticlassConfusionMatrix(num_classes=N)`  | Logits / Probabilitas  | Matriks NxN: label vs prediksi                          |


___

```python
import torch.nn.functional as F

| Fungsi                  | Keterangan                     |
| ----------------------- | ------------------------------ |
| `F.relu(x)`             | ReLU activation                |
| `F.leaky_relu(x)`       | Leaky ReLU                     |
| `F.sigmoid(x)`          | Sigmoid                        |
| `F.tanh(x)`             | Tanh                           |
| `F.elu(x)`              | Exponential Linear Unit        |
| `F.gelu(x)`             | Gaussian Error Linear Unit     |
| `F.selu(x)`             | Scaled Exponential Linear Unit |
| `F.softmax(x, dim)`     | Softmax over dim               |
| `F.log_softmax(x, dim)` | Log-Softmax                    |


| Fungsi                             | Keterangan                                   |
| ---------------------------------- | -------------------------------------------- |
| `F.cross_entropy(output, target)`  | Cross-entropy loss (klasifikasi multi kelas) |
| `F.binary_cross_entropy(out, tgt)` | Binary cross entropy loss                    |
| `F.mse_loss(out, tgt)`             | Mean squared error loss                      |
| `F.nll_loss(out, tgt)`             | Negative log-likelihood loss                 |
| `F.l1_loss(out, tgt)`              | Mean absolute error                          |
| `F.smooth_l1_loss(out, tgt)`       | Huber loss                                   |
| `F.kl_div(out, tgt)`               | Kullback-Leibler divergence                  |


| Fungsi                         | Keterangan               |
| ------------------------------ | ------------------------ |
| `F.conv1d`, `conv2d`, `conv3d` | Operasi konvolusi manual |
| `F.max_pool1d`, `max_pool2d`   | Max pooling              |
| `F.avg_pool1d`, `avg_pool2d`   | Average pooling          |
| `F.adaptive_avg_pool2d`        | Adaptive pooling         |
| `F.interpolate(x)`             | Upsampling/downsampling  |


| Fungsi                  | Keterangan                   |
| ----------------------- | ---------------------------- |
| `F.batch_norm`          | Batch normalization          |
| `F.layer_norm`          | Layer normalization          |
| `F.instance_norm`       | Instance normalization       |
| `F.group_norm`          | Group normalization          |
| `F.local_response_norm` | Local response normalization |


| Fungsi            | Keterangan                     |
| ----------------- | ------------------------------ |
| `F.dropout`       | Dropout biasa                  |
| `F.dropout2d`     | Dropout untuk input 4D (CNN)   |
| `F.dropout3d`     | Dropout untuk input 5D         |
| `F.alpha_dropout` | Dropout untuk SELU aktivasinya |


| Fungsi                      | Keterangan             |
| --------------------------- | ---------------------- |
| `F.linear(x, w, b)`         | Operasi linear         |
| `F.embedding(x, weight)`    | Lookup tabel embedding |
| `F.one_hot(x, num_classes)` | Konversi ke one-hot    |
| `F.normalize(x)`            | Normalisasi vektor     |


| Fungsi                        | Keterangan        |
| ----------------------------- | ----------------- |
| `F.pairwise_distance(x1, x2)` | Jarak Euclidean   |
| `F.cosine_similarity(x1, x2)` | Kemiripan kosinus |


| Fungsi                                               | Keterangan                       |
| ---------------------------------------------------- | -------------------------------- |
| `F.pad(input, pad)`                                  | Padding manual                   |
| `F.unfold(input, kernel_size)`                       | Ekstrak patch                    |
| `F.fold(input, output_size)`                         | Balik dari unfold                |
| `F.grid_sample(input, grid)`                         | Sampling berbasis koordinat grid |
| `F.affine_grid(theta, size)`                         | Grid transformasi afine          |
| `F.binary_cross_entropy_with_logits(logits, target)` | Binary cross entropy dari logits |


# **Struktur Model**

`nn.<Layer>(Fitur, Neuron)`

- **Fitur di pytorch harus definisikan berulang-ulang berbeda dengan tensorflow yg didefinisikan diawal**
- **Konsepnay seperti silang, fitur yg kita isi dilayer selanjutnya ada jumlah neurn pada layer sebelumnya**

**Untuk menerima Fitur = (A, B) di pytorch berbeda dengan tensorflow**

```python
class CustomModel(nn.Module):
    def __init__(self, input_shape, output_dim):
        super().__init__()

        # Hitung input size setelah flatten
        self.input_dim = input_shape[0] * input_shape[1]

        # Layer-layer
        self.fc1 = nn.Linear(self.input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_dim)

    def forward(self, x):
        x = x.view(x.size(0), -1)        # Flatten secara manual
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        return x

model = CustomModel(input_shape=(100, 40), output_dim=10)

```python
class CustomModel(nn.Module):
    def __init__(self, input_shape, output_dim):
        super().__init__()

        # Layer flatten otomatis
        self.flatten = nn.Flatten()

        # Hitung input size setelah flatten
        self.input_dim = input_shape[0] * input_shape[1]

        # Layer-layer lainnya
        self.fc1 = nn.Linear(self.input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_dim)

    def forward(self, x):
        x = self.flatten(x)         # Gunakan nn.Flatten()
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        return x

model = CustomModel(input_shape=(100, 40), output_dim=10)

1. **Sequential**

In [None]:
import torch.nn as nn

model = nn.Sequential(
    nn.Linear(100, 64),   # Layer 1
    nn.ReLU(),            # Aktivasi 1

    nn.Linear(64, 32),    # Layer 2
    nn.ReLU(),            # Aktivasi 2

    nn.Linear(32, 10),    # Output Layer
    nn.Softmax(dim=1)     # Aktivasi output (klasifikasi multi-kelas)
)

2. **Object-Oriented Programming (OOP)**

**Murni**

In [None]:
class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        # ini tempat mendefinisikan
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Linear(16 * 14 * 14, 10)  

    def forward(self, x):
        # ini tempat eksekui, lihat bahwa konsepnya mirip API fungsional tensorflow -> x = ___(x)
        x = self.conv1(x)  
        x = self.relu(x)
        x = self.pool(x)   
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x


**Versi dengan F** : Fungsi aktivasi didefinisikan dengan F

In [None]:
import torch.nn.functional as F

class MyMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Tempat mendefinisikan
        self.fc1 = nn.Linear(100, 64)  # Layer pertama
        self.fc2 = nn.Linear(64, 32)   # Layer kedua
        self.fc3 = nn.Linear(32, 10)   # Output layer

    def forward(self, x):
        # Tempat eksekusi
        # dengan F, kita tidak perlu mendefinisikan fungsi aktivasi diatas, bisa dibuat dengan F dibawah
        x = self.fc1(x)        # Layer 1
        x = F.relu(x)          # Aktivasi 1
        x = self.fc2(x)        # Layer 2
        x = F.relu(x)          # Aktivasi 2
        x = self.fc3(x)        # Layer 3
        x = F.softmax(x, dim=1)  # Aktivasi Output (klasifikasi multi-kelas)
        return x

- **Two-Branch Network**

`torch.cat` : Menggabungkan dua tensor dari dua "cabang" berbeda menjadi satu tensor sepanjang dimensi tertentu (biasanya dim=1 / axis=1) untuk bisa diproses bersama.

**Versi gabungan `Squential`**

In [None]:
class TwoBranchNet(nn.Module):
    def __init__(self):
        super().__init__()
        # input 1 
        self.branch1 = nn.Sequential(
            nn.Linear(10, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64)
        )

        # input 2
        self.branch2 = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1),
            nn.Flatten()
        )

        # Gabungan kedua branch
        self.fc = nn.Sequential(
            nn.Linear(64 + 32, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x1, x2):
        x1_out = self.branch1(x1)
        x2 = x2.unsqueeze(1)  # reshape (batch, 1, features) for Conv1d
        x2_out = self.branch2(x2)
        merged = torch.cat((x1_out, x2_out), dim=1)
        return self.fc(merged)
        

**Murni OOP**

In [None]:
class TwoBranchNetOOP(nn.Module):
    def __init__(self):
        super().__init__()

        # Branch 1: Untuk input fitur numerik
        self.linear1 = nn.Linear(10, 64)
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm1d(64)

        # Branch 2: Untuk input sensor/gambar 1D
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.flatten = nn.Flatten()

        # Fully Connected setelah penggabungan dua branch
        self.fc1 = nn.Linear(64 + 32, 64)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x1, x2):
        # Branch 1
        x1 = self.linear1(x1)
        x1 = self.relu1(x1)
        x1 = self.bn1(x1)

        # Branch 2
        x2 = x2.unsqueeze(1)  # (batch, 1, features)
        x2 = self.conv1(x2)
        x2 = self.relu2(x2)
        x2 = self.pool(x2)
        x2 = self.flatten(x2)

        # Concatenate kedua branch
        merged = torch.cat((x1, x2), dim=1)

        # Fully Connected akhir
        out = self.fc1(merged)
        out = self.relu3(out)
        out = self.fc2(out)
        out = self.sigmoid(out)

        return out

- **Residual Conection**

In [None]:
class ResidualModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ResidualModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc_out = nn.Linear(64, output_dim)

    def forward(self, x):
        residual = F.relu(self.fc1(x))      # simpan residual setelah aktivasi
        out = F.relu(self.fc2(residual))
        out = out + residual                # residual connection
        out = self.fc_out(out)
        return out

- **Multi-Head Network**

In [None]:
class MultiOutputModel(nn.Module):
    def __init__(self, input_dim):
        super(MultiOutputModel, self).__init__()
        self.shared = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU()
        )
        self.regression_head = nn.Linear(64, 1)     # Output regresi
        self.classification_head = nn.Linear(64, 3) # Output klasifikasi (misal 3 kelas)

    def forward(self, x):
        shared_out = self.shared(x)
        out_regression = self.regression_head(shared_out)
        out_classification = self.classification_head(shared_out)
        return out_regression, out_classification   # Terlihat bahwa ada 2 return

___

**Style Menulis Code di Forward :**

- Langsung digabung

In [None]:
def __init__(self):
    self.fc1 = nn.Linear(20, 128)
    self.bn1 = nn.BatchNorm1d(128)
    self.relu = nn.ReLU()
    self.drop = nn.Dropout(0.3)

def forward(self, x):
    x = self.drop(self.relu(self.bn1(self.fc1(x)))) # yg paling dalam duluan
    return x

- Didefinisikan dibawahnya

In [None]:
class ModelTerbuka(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(20, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(0.3)

    def forward(self, x):
        x = self.fc1(x)         # Step 1: Linear transform
        x = self.bn1(x)         # Step 2: Batch normalization
        x = self.relu(x)        # Step 3: Activation
        x = self.drop(x)        # Step 4: Dropout
        return x

___

**Style Arsitektur Model pada Input, Output, Hidden:**


- Didefinisikan Langsung pada Layer

In [None]:
class DeepModelFixed5_OOP(nn.Module):
    def __init__(self):
        super().__init__()

        # Hardcoded dimensions
        # Layer 1: 20 -> 128
        self.fc1 = nn.Linear(20, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.drop1 = nn.Dropout(0.3)

        # Layer 2: 128 -> 64
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.drop2 = nn.Dropout(0.3)

        # Layer 3: 64 -> 32
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.drop3 = nn.Dropout(0.3)

        # Layer 4: 32 -> 16
        self.fc4 = nn.Linear(32, 16)
        self.bn4 = nn.BatchNorm1d(16)
        self.drop4 = nn.Dropout(0.3)

        # Output layer: 16 -> 1
        self.fc5 = nn.Linear(16, 1)

        # Activation
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.drop1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.drop2(x)

        x = self.fc3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.drop3(x)

        x = self.fc4(x)
        x = self.bn4(x)
        x = self.relu(x)
        x = self.drop4(x)

        x = self.fc5(x)

        return x

model = DeepModelFixed5_OOP()

- Dibuat Menjadi Parameter

Tanpa sequential

In [None]:
class DeepModelParam5_OOP(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate=0.3):
        super().__init__()

        # 5 Layer : 4 Hidden Layer + 1 Output
        assert len(hidden_dims) == 4 , "Model harus punya 4 hidden layer (total 5 layer termasuk output)"

        # Layer 1
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.bn1 = nn.BatchNorm1d(hidden_dims[0])
        self.drop1 = nn.Dropout(dropout_rate)

        # Layer 2
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.bn2 = nn.BatchNorm1d(hidden_dims[1])
        self.drop2 = nn.Dropout(dropout_rate)

        # Layer 3
        self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
        self.bn3 = nn.BatchNorm1d(hidden_dims[2])
        self.drop3 = nn.Dropout(dropout_rate)

        # Layer 4
        self.fc4 = nn.Linear(hidden_dims[2], hidden_dims[3])
        self.bn4 = nn.BatchNorm1d(hidden_dims[3])
        self.drop4 = nn.Dropout(dropout_rate)

        # Output layer (layer ke-5)
        self.fc5 = nn.Linear(hidden_dims[3], output_dim)

        # Activation (satu objek, dipakai berulang)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Layer 1
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.drop1(x)

        # Layer 2
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.drop2(x)

        # Layer 3
        x = self.fc3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.drop3(x)

        # Layer 4
        x = self.fc4(x)
        x = self.bn4(x)
        x = self.relu(x)
        x = self.drop4(x)

        # Output layer 
        x = self.fc5(x)

        return x

model = DeepModelParam5_OOP(
    input_dim=20,
    hidden_dims=[128, 64, 32, 16],
    output_dim=1,
    dropout_rate=0.3
)

Dengan sequential

In [None]:
class DeepModelParam5(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate=0.3):
        super().__init__()

        # 5 Layer : 4 Hidden Layer + 1 Output
        assert len(hidden_dims) == 4 , "Model harus punya 4 hidden layer (total 5 layer termasuk output)"
        
        layers = []
        in_features = input_dim
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(in_features, hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            in_features = hidden_dim
        
        # Output layer
        layers.append(nn.Linear(in_features, output_dim))

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

model = DeepModelParam5_OOP(
    input_dim=20,
    hidden_dims=[128, 64, 32, 16],
    output_dim=1,
    dropout_rate=0.3
)

# **Tingkatan**

**Sederhana**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Dataset sederhana
x = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])

# Model Linear
model = nn.Linear(1, 1)

# Loss dan Optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training
for epoch in range(100):
    y_pred = model(x)
    loss = criterion(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# Prediksi
print(model(torch.tensor([[5.0]])))

Epoch 0, Loss: 16.5367
Epoch 10, Loss: 0.4294
Epoch 20, Loss: 0.0126
Epoch 30, Loss: 0.0017
Epoch 40, Loss: 0.0014
Epoch 50, Loss: 0.0013
Epoch 60, Loss: 0.0012
Epoch 70, Loss: 0.0011
Epoch 80, Loss: 0.0011
Epoch 90, Loss: 0.0010
tensor([[9.9473]], grad_fn=<AddmmBackward0>)


**Menegah**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# CNN Model
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

# Training Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
for epoch in range(5):
    total_loss = 0
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

**Advanced**

In [None]:
import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class TransformerClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, nhead, hidden_dim, num_layers, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.pos_encoder = nn.Parameter(torch.rand(5000, embed_dim))
        encoder_layers = TransformerEncoderLayer(embed_dim, nhead, hidden_dim)
        self.transformer = TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x) + self.pos_encoder[:x.size(1)]
        x = self.transformer(x)
        x = x.mean(dim=1)  # global average pooling
        return self.fc(x)

In [None]:
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import train_test_split

class ComplexModel(nn.Module):
    def __init__(self):
        super().__init__()

        ## Branch 1
        self.dense1_branch1 = nn.Linear(3, 64)
        self.bn1_branch1 = nn.BatchNorm1d(64)
        self.dropout1_branch1 = nn.Dropout(0.3)
        self.dense_residual_branch1 = nn.Linear(64, 64)
        self.dense2_branch1 = nn.Linear(64, 32)

        ## Branch 2 - Conv1D
        self.conv1_branch2 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=2, padding=1)
        self.bn1_branch2 = nn.BatchNorm1d(32)
        self.maxpool_branch2 = nn.MaxPool1d(kernel_size=2)
        self.dense1_branch2 = nn.Linear(32 * 2, 64)  # Output flatten setelah pooling
        self.dropout1_branch2 = nn.Dropout(0.3)

        ## Branch 2 - Dense Parallel
        self.dense_parallel_branch2 = nn.Linear(4, 48)
        self.bn_parallel_branch2 = nn.BatchNorm1d(48)

        ## Attention
        self.attention = MultiHeadSelfAttention(embed_dim=32 + 64 + 48, num_heads=4)

        ## After Attention
        self.dense_post_attention = nn.Linear(32 + 64 + 48, 128)
        self.bn_post_attention = nn.BatchNorm1d(128)
        self.dropout_post_attention = nn.Dropout(0.4)
        self.dense_residual_post = nn.Linear(128, 128)

        ## Output
        self.output = nn.Linear(128, 1)

    def forward(self, input1, input2):
        ### Branch 1
        x1 = F.relu(self.dense1_branch1(input1))
        x1 = self.bn1_branch1(x1)
        x1 = self.dropout1_branch1(x1)
        x1_res = F.relu(self.dense_residual_branch1(x1))
        x1 = x1 + x1_res  # Residual connection
        x1 = F.relu(self.dense2_branch1(x1))

        ### Branch 2 - Conv1D
        x2 = input2.unsqueeze(1)  # shape: (batch, 1, 4)
        x2 = F.relu(self.conv1_branch2(x2))  # (batch, 32, 4)
        x2 = self.bn1_branch2(x2)
        x2 = self.maxpool_branch2(x2)  # (batch, 32, 2)
        x2 = x2.view(x2.size(0), -1)  # flatten
        x2 = F.relu(self.dense1_branch2(x2))
        x2 = self.dropout1_branch2(x2)

        ### Branch 2 - Parallel Dense
        x2_parallel = F.relu(self.dense_parallel_branch2(input2))
        x2_parallel = self.bn_parallel_branch2(x2_parallel)

        ### Gabung branch2
        x2_merged = torch.cat([x2, x2_parallel], dim=1)  # (batch, 64+48)

        ### Gabung seluruh branch
        merged = torch.cat([x1, x2_merged], dim=1)  # (batch, 32+64+48)

        ### Attention
        merged_attn_input = merged.unsqueeze(1)  # (batch, 1, features)
        attn_output = self.attention(merged_attn_input).squeeze(1)  # (batch, features)

        ### Post-attention processing
        x = F.relu(self.dense_post_attention(attn_output))
        x = self.bn_post_attention(x)
        x = self.dropout_post_attention(x)
        x_res = F.relu(self.dense_residual_post(x))
        x = x + x_res  # residual connection

        ### Output
        output = torch.sigmoid(self.output(x))
        return output


# Dummy dataset
input1 = torch.randn(1000, 3)
input2 = torch.randn(1000, 4)
labels = torch.randint(0, 2, (1000,)).float()

# Split train & val
train_idx, val_idx = train_test_split(range(1000), test_size=0.2, random_state=42)
train_dataset = TensorDataset(input1[train_idx], input2[train_idx], labels[train_idx])
val_dataset = TensorDataset(input1[val_idx], input2[val_idx], labels[val_idx])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# Inisialisasi
model = ComplexModel()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCELoss()  # Binary Cross Entropy
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
for epoch in range(1, 11):
    model.train()
    total_loss = 0
    for input1_batch, input2_batch, y_batch in train_loader:
        input1_batch, input2_batch, y_batch = input1_batch.to(device), input2_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        preds = model(input1_batch, input2_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"[Epoch {epoch}] Train Loss: {total_loss / len(train_loader):.4f}")
    
    # Validation
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for input1_batch, input2_batch, y_batch in val_loader:
            input1_batch, input2_batch, y_batch = input1_batch.to(device), input2_batch.to(device), y_batch.to(device)
            preds = model(input1_batch, input2_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item()
            preds_binary = (preds > 0.5).float()
            correct += (preds_binary == y_batch).sum().item()
            total += y_batch.size(0)
    
    acc = correct / total
    print(f"          Val Loss: {val_loss / len(val_loader):.4f}, Accuracy: {acc:.4f}")


# **PyTorch Lightning**

high-level wrapper untuk PyTorch yang membantu kamu membuat model deep learning lebih bersih, terstruktur, dan scalable, tanpa mengubah core PyTorch.

| Fitur                          | PyTorch Biasa                              | PyTorch Lightning           |
| ------------------------------ | ------------------------------------------ | --------------------------- |
| Looping Training               | Ditulis manual (`for epoch in range(...)`) | Otomatis oleh `Trainer`     |
| Kode Training dan Model Campur | Ya                                         | Tidak, dipisah dengan jelas |
| Logging Manual                 | Ya                                         | Otomatis via `self.log()`   |
| Multi-GPU, TPU                 | Manual dan kompleks                        | Sangat mudah dengan 1 baris |
| Checkpointing                  | Manual                                     | Otomatis                    |
| Monitoring Loss, Metric        | Harus ditulis sendiri                      | Otomatis                    |
| Readability                    | Bisa berantakan                            | Bersih dan modular          |


- Versi lama : `import pytorch_lightning as pl`
- Versi baru : `import lightning as l`

In [None]:
### Pytorch Biasa
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(10, 1)

    def forward(self, x):
        return self.layer(x)

model = Net()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()

for epoch in range(10):
    for batch in dataloader:
        x, y = batch
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

### Pytorch Lightning
class LitModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(10, 1)
        self.loss_fn = nn.MSELoss()

    def forward(self, x):
        return self.layer(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss_fn(y_pred, y)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

trainer = pl.Trainer(max_epochs=10)
model = LitModel()
trainer.fit(model, dataloader)

**style penggunaan lightning bisa digabung menjadi 1 class bersamaan dengan model deep learning atau dipisah**

> **Versi Gabung**
```python 
class LitModelOOP(L.LightningModule):
    def __init__(self, input_dim, hidden_dim, output_dim, lr=1e-3):
        super().__init__()
        self.lr = lr
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = F.mse_loss(y_hat, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = F.mse_loss(y_hat, y)
        self.log("val_loss", loss)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

model = LitModelOOP(input_dim=20, hidden_dim=64, output_dim=1, lr=1e-3)

trainer = L.Trainer(max_epochs=10)

trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

trainer.validate(model, dataloaders=val_loader)

> **Versi Pisah**
```python
class MyOOPModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

class LitModelSeparate(L.LightningModule):
    def __init__(self, model, lr=1e-3):
        super().__init__()
        self.model = model
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = F.mse_loss(y_hat, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = F.mse_loss(y_hat, y)
        self.log("val_loss", loss)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

model = MyOOPModel(input_dim=20, hidden_dim=64, output_dim=1)
lit_model = LitModelSeparate(model=model, lr=1e-3)

trainer = L.Trainer(max_epochs=10)

trainer.fit(lit_model, train_dataloaders=train_loader, val_dataloaders=val_loader)

trainer.validate(model, dataloaders=val_loader)

# **Arsitektur Model**

**Model**
| No | Model                           | Modul yang Diimpor                         | Cara Mengimpor Modelnya Lengkap                                                      |
| -- | ------------------------------- | ------------------------------------------ | ------------------------------------------------------------------------------------ |
| 1  | Multilayer Perceptron (MLP)     | `import torch.nn as nn`                    | **Custome**          |
| 2  | Convolutional Neural Net (CNN)  | `import torch.nn as nn`                    | **Custome** |
| 3  | Recurrent Neural Net (RNN)      | `from torch.nn import RNN`                 | `rnn = RNN(input_size=10, hidden_size=20, num_layers=2, batch_first=True)`           |
| 4  | Long Short-Term Memory (LSTM)   | `from torch.nn import LSTM`                | `lstm = LSTM(input_size=10, hidden_size=50, num_layers=2, batch_first=True)`         |
| 5  | Gated Recurrent Unit (GRU)      | `from torch.nn import GRU`                 | `gru = GRU(input_size=10, hidden_size=50, num_layers=2, batch_first=True)`           |
| 6  | Transformer                     | `from torch.nn import Transformer`         | `transformer = Transformer(d_model=512, nhead=8, num_encoder_layers=6)`              |
| 7 | ResNet-18 (Pretrained)          | `from torchvision import models`           | `resnet = models.resnet18(pretrained=True)`                                          |
| 8 | VGG-16 (Pretrained)             | `from torchvision import models`           | `vgg = models.vgg16(pretrained=True)`                                                |
| 9 | MobileNet (Pretrained)          | `from torchvision import models`           | `mobilenet = models.mobilenet_v2(pretrained=True)`                                   |
| 10 | EfficientNet (Pretrained)       | `from torchvision import models`           | `efficientnet = models.efficientnet_b0(pretrained=True)`                             |
| 11 | DenseNet (Pretrained)           | `from torchvision import models`           | `densenet = models.densenet121(pretrained=True)`                                     |
| 12 | Vision Transformer (ViT)        | `from torchvision.models import vit_b_16`  | `model = vit_b_16(pretrained=True)`                                                  |
| 13 | Swin Transformer (ViT)          | `from torchvision.models import swin_v2_b` | `model = swin_v2_b(pretrained=True)`                                                 |

| No | Model                           | Modul yang Diimpor                         | Cara Mengimpor Modelnya Lengkap                                                      |
| -- | ------------------------------- | ------------------------------------------ | ------------------------------------------------------------------------------------ |
| 1 | BERT (NLP)                      | `from transformers import BertModel`       | `model = BertModel.from_pretrained('bert-base-uncased')`                             |
| 2 | GPT-2 (NLP)                     | `from transformers import GPT2Model`       | `model = GPT2Model.from_pretrained('gpt2')`                                          |
| 3 | T5 (NLP)                        | `from transformers import T5Model`         | `model = T5Model.from_pretrained('t5-small')`                                        |

**MLP (Multi-Layer Perceptron)**

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.layers(x)

**CNN (Convolutional Neural Network)**

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # input: (3,32,32)
        self.pool = nn.MaxPool2d(2, 2)                           # output: (32,16,16)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # (64,16,16)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # (32,16,16)
        x = self.pool(F.relu(self.conv2(x)))  # (64,8,8)
        x = x.view(x.size(0), -1)             # flatten
        x = F.relu(self.fc1(x))
        return self.fc2(x)

**AutoEncoder**

Fungsi : Mengkompres data dan rekonstruksi ulang. Berguna untuk noise removal, dimensionality reduction, anomaly detection.

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder_fc1 = nn.Linear(28*28, 256)
        self.encoder_fc2 = nn.Linear(256, 64)

        self.decoder_fc1 = nn.Linear(64, 256)
        self.decoder_fc2 = nn.Linear(256, 28*28)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        encoded = F.relu(self.encoder_fc1(x))
        encoded = F.relu(self.encoder_fc2(encoded))

        decoded = F.relu(self.decoder_fc1(encoded))
        decoded = torch.sigmoid(self.decoder_fc2(decoded))
        return decoded.view(-1, 1, 28, 28)

In [None]:
class LitAutoEncoder(L.LightningModule):
    def __init__(self, lr=1e-3):
        super().__init__()
        self.model = AutoEncoder()
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, _ = batch
        x_hat = self(x)
        loss = F.mse_loss(x_hat, x)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, _ = batch
        x_hat = self(x)
        loss = F.mse_loss(x_hat, x)
        self.log("val_loss", loss)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [None]:
def train_autoencoder():
    transform = transforms.ToTensor()
    dataset = ___()
    train_set, val_set = random_split(dataset, [55000, 5000])
    train_loader = DataLoader(train_set, batch_size=64)
    val_loader = DataLoader(val_set, batch_size=64)

    model = LitAutoEncoder()
    trainer = L.Trainer(max_epochs=5, accelerator="cpu")
    trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

if __name__ == '__main__':
    train_autoencoder()

**U-Net (Semantic Segmentation)**

Fungsi : Digunakan untuk segmentasi gambar pixel-wise 

Arsitektur :
- Encoder (CNN biasa, downsampling)
- Bottleneck
- Decoder (upsampling + skip connection dari encoder)

In [None]:
class UNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        return x

class UNetOOP(nn.Module):
    def __init__(self):
        super().__init__()
        self.enc1 = UNetBlock(1, 64)    # Model diatas
        self.pool1 = nn.MaxPool2d(2)
        self.enc2 = UNetBlock(64, 128)  # Model diatas
        self.pool2 = nn.MaxPool2d(2)
        self.bottleneck = UNetBlock(128, 256)   # Model diatas
        self.up2 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.dec2 = UNetBlock(256, 128) # Model diatas
        self.up1 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.dec1 = UNetBlock(128, 64)  # Model diatas
        self.final = nn.Conv2d(64, 1, 1)

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool1(e1))
        b = self.bottleneck(self.pool2(e2))
        d2 = self.dec2(torch.cat([self.up2(b), e2], dim=1))
        d1 = self.dec1(torch.cat([self.up1(d2), e1], dim=1))
        return torch.sigmoid(self.final(d1))
        

In [None]:
class LitUNet(L.LightningModule):
    def __init__(self, lr=1e-3):
        super().__init__()
        self.model = UNetOOP()  # Model Utama
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.binary_cross_entropy(y_hat, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.binary_cross_entropy(y_hat, y)
        self.log("val_loss", loss)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [None]:
def train_unet():
    dataset = ___
    train_set, val_set = random_split(dataset, [160, 40])
    train_loader = DataLoader(train_set, batch_size=8)
    val_loader = DataLoader(val_set, batch_size=8)

    model = LitUNet()
    trainer = L.Trainer(max_epochs=5, accelerator="cpu")
    trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

if __name__ == '__main__':
    train_unet()

**Siamese Network (Similarity Learning)**

Fungsi : Digunakan untuk membandingkan dua input

Arsitektur :
- Dua cabang dengan parameter sharing
- Loss: contrastive loss atau triplet loss

In [None]:
class SiameseBackbone(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, 3)
        self.pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(16*13*13, 128)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return x

class SiameseNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = SiameseBackbone()

    def forward(self, x1, x2):
        out1 = self.backbone(x1)
        out2 = self.backbone(x2)
        return out1, out2

def contrastive_loss(out1, out2, label, margin=1.0):
    distance = F.pairwise_distance(out1, out2)
    loss = label * distance**2 + (1 - label) * F.relu(margin - distance)**2
    return loss.mean()

In [None]:
class LitSiamese(L.LightningModule):
    def __init__(self, lr=1e-3):
        super().__init__()
        self.model = SiameseNet()
        self.lr = lr

    def forward(self, x1, x2):
        return self.model(x1, x2)

    def training_step(self, batch, batch_idx):
        x1, x2, label = batch
        out1, out2 = self(x1, x2)
        loss = contrastive_loss(out1, out2, label)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [None]:
def train_siamese():
    dataset = DummySiameseDataset()
    train_loader = DataLoader(dataset, batch_size=16)

    model = LitSiamese()
    trainer = L.Trainer(max_epochs=5, accelerator="cpu")
    trainer.fit(model, train_loader)

if __name__ == '__main__':
    train_unet()
    train_siamese()

**GAN (Generative Adversarial Network)**

Fungsi : Model generatif yang mampu membuat gambar, audio, atau data sintetik lainnya.

Arsitektur :
- Generator: membuat gambar palsu
- Discriminator: membedakan gambar asli dan palsu
- Training: Adversarial (minimax game)

In [None]:
class Generator(nn.Module):
    def __init__(self, noise_dim=100):
        super().__init__()
        self.fc1 = nn.Linear(noise_dim, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 1024)
        self.fc4 = nn.Linear(1024, 28 * 28)

    def forward(self, z):
        x = F.relu(self.fc1(z))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.tanh(self.fc4(x))
        return x.view(-1, 1, 28, 28)

class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28 * 28, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 1)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.leaky_relu(self.fc3(x), 0.2)
        return torch.sigmoid(self.fc4(x))

In [None]:
class LitGAN(L.LightningModule):
    def __init__(self, noise_dim=100, lr=2e-4):
        super().__init__()
        self.generator = Generator(noise_dim)
        self.discriminator = Discriminator()
        self.noise_dim = noise_dim
        self.lr = lr

    def forward(self, z):
        return self.generator(z)

    def adversarial_loss(self, y_hat, y):
        return F.binary_cross_entropy(y_hat, y)

    def training_step(self, batch, batch_idx, optimizer_idx):
        real_imgs, _ = batch
        batch_size = real_imgs.size(0)
        valid = torch.ones(batch_size, 1, device=self.device)
        fake = torch.zeros(batch_size, 1, device=self.device)

        if optimizer_idx == 0:
            # Train generator
            z = torch.randn(batch_size, self.noise_dim, device=self.device)
            gen_imgs = self(z)
            pred_fake = self.discriminator(gen_imgs)
            g_loss = self.adversarial_loss(pred_fake, valid)
            self.log("g_loss", g_loss)
            return g_loss

        if optimizer_idx == 1:
            # Train discriminator
            pred_real = self.discriminator(real_imgs)
            real_loss = self.adversarial_loss(pred_real, valid)

            z = torch.randn(batch_size, self.noise_dim, device=self.device)
            gen_imgs = self(z).detach()
            pred_fake = self.discriminator(gen_imgs)
            fake_loss = self.adversarial_loss(pred_fake, fake)

            d_loss = (real_loss + fake_loss) / 2
            self.log("d_loss", d_loss)
            return d_loss

    def configure_optimizers(self):
        opt_g = torch.optim.Adam(self.generator.parameters(), lr=self.lr, betas=(0.5, 0.999))
        opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=self.lr, betas=(0.5, 0.999))
        return [opt_g, opt_d], []

In [None]:
def train_gan():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])
    dataset = datasets.MNIST("./data", train=True, download=True, transform=transform)
    train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

    model = LitGAN()
    trainer = L.Trainer(max_epochs=10, accelerator="cpu")
    trainer.fit(model, train_loader)

if __name__ == '__main__':
    train_gan()

**N-BEATS**

Fungsi : membangun model yang mampu melakukan forecasting deret waktu secara akurasi tinggi dan interpretable, tanpa perlu informasi domain khusus seperti tanggal, musim, dsb (purely data-driven)

Mekanisme :
1. Stacked Residual Blocks : Beberapa blok (residual blocks) yang disusun secara berurutan, Setiap blok menerima input x lalu menghasilkan 
- Backcast → perkiraan bagian dari input yang bisa dijelaskan (sisa input setelah dipelajari)
- Forecast → prediksi untuk masa depan 

Forecast dari semua blok dijumlahkan → total prediksi akhir.

2. Decomposisi Backcast-Forecast : Setiap blok mempelajari dua hal yakni 
- Backcast → bagian input yang bisa dijelaskan (mirip autoencoder)
- Forecast → bagian prediksi masa depan (apa yang kita inginkan)

Model mencoba mempelajari residu (sisa input) di setiap blok, seperti mekanisme residual learning

3. Basis Expansion : N-BEATS menggunakan ide basis functions (seperti polinomial atau Fourier)
- Pada tiap blok, hasil theta akan digunakan untuk merekonstruksi forecast menggunakan basi
- Dua jenis basis bawaan → Trend basis (polinomial: 1, t, t², …) → cocok untuk tren naik/turun & Seasonal basis (Fourier/sinus) → cocok untuk pola berulang

```
Input Series:
     │
     │
     ├── Backcast Block1: ──[FC]──┐
     │                            ▼
     │                     Residual: x - backcast
     │                           │
     ├── Backcast Block2: ──[FC]─┐
     │                           ...
Forecast: Sum dari semua blok
```


In [None]:
# Block dasar N-BEATS
class NBeatsBlock(nn.Module):
    def __init__(self, input_size, theta_size, hidden_size=128, n_hidden=4):
        super().__init__()
        self.fc = nn.ModuleList([nn.Linear(input_size if i == 0 else hidden_size, hidden_size) for i in range(n_hidden)])
        self.relu = nn.ReLU()
        self.theta = nn.Linear(hidden_size, theta_size)

    def forward(self, x):
        for layer in self.fc:
            x = self.relu(layer(x))
        theta = self.theta(x)
        return theta

# Block trend basis
class TrendBlock(NBeatsBlock):
    def __init__(self, input_size, forecast_size, hidden_size=128, n_hidden=4):
        super().__init__(input_size, theta_size=forecast_size, hidden_size=hidden_size, n_hidden=n_hidden)
        self.backcast_size = input_size
        self.forecast_size = forecast_size

        self.register_buffer("t", torch.linspace(-1, 1, steps=forecast_size).unsqueeze(0))
        self.basis = torch.stack([self.t ** i for i in range(forecast_size)], dim=-1)  # (1, forecast_size, forecast_size)

    def forward(self, x):
        theta = super().forward(x)
        forecast = torch.matmul(self.basis, theta.unsqueeze(-1)).squeeze(-1)  # shape (B, forecast_size)
        backcast = x - forecast  # residual learning
        return backcast, forecast

In [None]:
class NBeatsModel(L.LightningModule):
    def __init__(self, input_size, forecast_size, hidden_size=128, n_blocks=3, lr=1e-3):
        super().__init__()
        self.save_hyperparameters()
        self.blocks = nn.ModuleList([
            TrendBlock(input_size, forecast_size, hidden_size=hidden_size)
            for _ in range(n_blocks)
        ])
        self.lr = lr

    def forward(self, x):
        forecast = 0
        for block in self.blocks:
            backcast, block_forecast = block(x)
            x = backcast
            forecast += block_forecast
        return forecast

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.MSELoss()(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.MSELoss()(y_hat, y)
        self.log('val_loss', loss)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [None]:
model = NBeatsModel(input_size=30, forecast_size=10, hidden_size=128, n_blocks=4, lr=1e-3)

trainer = Trainer(
    max_epochs=50,
    callbacks=[EarlyStopping(monitor='val_loss', patience=5, mode='min')],
    accelerator='auto',
)

trainer.fit(model, train_loader, val_loader)