<a href="https://colab.research.google.com/github/maruwrks/Deep-Learning-Task/blob/main/DeepLearning_Task__Week_1_Regression_Infrared.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning training model

In [None]:
# pip install torch torchvision

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping

## Data Exploring

In [None]:
url = "https://raw.githubusercontent.com/farrelrassya/teachingMLDL/refs/heads/main/02.%20Deep%20Learning/Dataset/Infrared.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,Gender,Age,Ethnicity,T_atm,Humidity,Distance,T_offset1,Max1R13_1,Max1L13_1,aveAllR13_1,...,T_FHRC1,T_FHLC1,T_FHBC1,T_FHTC1,T_FH_Max1,T_FHC_Max1,T_Max1,T_OR1,T_OR_Max1,aveOralM
0,Male,41-50,White,24.0,28.0,0.8,0.7025,35.03,35.3775,34.4,...,33.4775,33.3725,33.4925,33.0025,34.53,34.0075,35.6925,35.635,35.6525,36.59
1,Female,31-40,Black or African-American,24.0,26.0,0.8,0.78,34.55,34.52,33.93,...,34.055,33.6775,33.97,34.0025,34.6825,34.66,35.175,35.0925,35.1075,37.19
2,Female,21-30,White,24.0,26.0,0.8,0.8625,35.6525,35.5175,34.2775,...,34.8275,34.6475,34.82,34.67,35.345,35.2225,35.9125,35.86,35.885,37.34
3,Female,21-30,Black or African-American,24.0,27.0,0.8,0.93,35.2225,35.6125,34.385,...,34.4225,34.655,34.3025,34.9175,35.6025,35.315,35.72,34.965,34.9825,37.09
4,Male,18-20,White,24.0,27.0,0.8,0.895,35.545,35.665,34.91,...,35.16,34.3975,34.67,33.8275,35.4175,35.3725,35.895,35.5875,35.6175,37.04


In [None]:
#ubah kolom gender dari object menjadi float agar dapat digunakan untuk pemodelan
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df['Gender'] = df['Gender'].astype(float)
df = df.select_dtypes(include=[np.number]) #seleksi hanya data numeric saja yang dipilih
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1020 entries, 0 to 1019
Data columns (total 32 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Gender       1020 non-null   float64
 1   T_atm        1020 non-null   float64
 2   Humidity     1020 non-null   float64
 3   Distance     1018 non-null   float64
 4   T_offset1    1020 non-null   float64
 5   Max1R13_1    1020 non-null   float64
 6   Max1L13_1    1020 non-null   float64
 7   aveAllR13_1  1020 non-null   float64
 8   aveAllL13_1  1020 non-null   float64
 9   T_RC1        1020 non-null   float64
 10  T_RC_Dry1    1020 non-null   float64
 11  T_RC_Wet1    1020 non-null   float64
 12  T_RC_Max1    1020 non-null   float64
 13  T_LC1        1020 non-null   float64
 14  T_LC_Dry1    1020 non-null   float64
 15  T_LC_Wet1    1020 non-null   float64
 16  T_LC_Max1    1020 non-null   float64
 17  RCC1         1020 non-null   float64
 18  LCC1         1020 non-null   float64
 19  canthi

In [None]:
df = df.dropna() #drop data yang berisi nilai NaN agar tidak ada kesalahan saat menghitung loss nanti
x = df.drop('aveOralM', axis=1)
y = df['aveOralM']

## Data preperation

In [None]:
#split data menjadi 80% train dan 20% test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
#gunakan scaler untuk membuat model lebih stabil lagi
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape[1]

31

In [None]:
#ini optional, output tidak harus di scaling lagi
y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test = y_scaler.transform(y_test.values.reshape(-1, 1))

# Pytorch

In [None]:
#ubah data menjadi format tensor untuk train dan test nanti
x_train_torch = torch.tensor(x_train, dtype=torch.float32)
x_test_torch = torch.tensor(x_test, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

## Modeling Neural Networks

In [None]:
class Modeling(nn.Module):
  def __init__(self,input):
    super(Modeling, self).__init__()
    self.fc1 = nn.Linear(input, 64)
    self.fc2 = nn.Linear(64, 32)
    self.fc3 = nn.Linear(32, 1)
  def forward(self, x):
    x = torch.relu(self.fc1(x))#menggunakan ReLU Activation
    x = torch.relu(self.fc2(x))#menggunakan RelU Activition
    x = self.fc3(x)
    return x

In [None]:
model = Modeling(x_train.shape[1])
criterion = nn.MSELoss()#menggunakan Mean Squared Error Loss, karena variabel yang kita gunakan bukan binary
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training Data

In [None]:
#Training model menggunakan pytorch
for epoch in range(100):
  model.train()
  optimizer.zero_grad()
  outputs = model(x_train_torch)
  loss = criterion(outputs, y_train_torch)
  loss.backward()
  optimizer.step()

#Kode ini untuk menentukan validation dari loss yang kita hitung
  model.eval()
  with torch.no_grad():
    val_out = model(x_test_torch)
    val_loss = criterion(val_out, y_test_torch)

  if epoch % 10 == 0:
    print(f'Epoch {epoch}, Train Loss: {loss.item()}, Val Loss: {val_loss.item()}')

Epoch 0, Train Loss: 1.1006532907485962, Val Loss: 0.7597088813781738
Epoch 10, Train Loss: 0.6649003624916077, Val Loss: 0.44561851024627686
Epoch 20, Train Loss: 0.4185132682323456, Val Loss: 0.3038439154624939
Epoch 30, Train Loss: 0.32319608330726624, Val Loss: 0.2864750027656555
Epoch 40, Train Loss: 0.2799713909626007, Val Loss: 0.24376963078975677
Epoch 50, Train Loss: 0.24606622755527496, Val Loss: 0.2444562017917633
Epoch 60, Train Loss: 0.21894173324108124, Val Loss: 0.23313428461551666
Epoch 70, Train Loss: 0.2027149200439453, Val Loss: 0.23105129599571228
Epoch 80, Train Loss: 0.1938198357820511, Val Loss: 0.22762472927570343
Epoch 90, Train Loss: 0.18893489241600037, Val Loss: 0.228718563914299


In [None]:
#buat variable predict untuk mengevaluasi performa model yang diuji
y_pred_torch = model(x_test_torch).detach().numpy()

#rmse adalah root mean squeared error, gunanya untuk menghitung eror dari suatu model dengan cara meng kuadratkan hasil eror MSE. semakin kecil RMSE semakin baik modelnya
rmse_torch = np.sqrt(mean_squared_error(y_test,y_pred_torch))
mse_torch = mean_squared_error(y_test, y_pred_torch)

#pada R2 gunanya untuk menjelaskan variabilitas dalam model, range nilainya antara 0 - 1, semakin besar nilai R2 maka semakin bagus training yang dilakukan model.
#R2 sendiri sudah seperti akurasi jika dijadikan persentase
r2_torch = r2_score(y_test, y_pred_torch)

In [None]:
print(f'RMSE: {rmse_torch} \nMSE: {mse_torch}\nR2: {r2_torch}')

RMSE: 0.4702644529999838 
MSE: 0.221148655755374
R2: 0.6945600812144945


In [None]:
#untuk menghitung range antara min dan max dari model test, jika range dari test nya sekitar 5 maka model sudah cukup baguis
y_min = torch.min(y_test_torch).item()
y_max = torch.max(y_test_torch).item()
y_range = y_max - y_min

#menghitung range prediksi, range prediksi harusnya mendekati range dari y_test, kalau range pred nya kurang dari range y_test maka training kurang optimal
pred_min = np.min(y_pred_torch)
pred_max = np.max(y_pred_torch)
pred_range = pred_max - pred_min

print(f"y_min:{y_min}, y_max:{y_max}, Rentang y_test: {y_range}")
print(f"pred_min :{pred_min}, pred_max:{pred_max}, Rentang y_pred: {pred_range}")


y_min:-1.5934081077575684, y_max:4.034672737121582, Rentang y_test: 5.62808084487915
pred_min :-1.2110434770584106, pred_max:3.955070972442627, Rentang y_pred: 5.166114330291748


# Tensorflow

In [None]:
#untuk mencari nilai yang hilang dan mengganti nilainya dengan rata rata dari impu
imputer = SimpleImputer(strategy='mean')
x_train = imputer.fit_transform(x_train)
x_test = imputer.transform(x_test)

In [None]:
inputs = tf.keras.Input(shape=(x_train.shape[1],))

x = tf.keras.layers.Dense(64, activation='relu')(inputs)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs,outputs)

#menggunakan early stopping untuk mengurangi overfitting dan underfitting
earlystopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.compile(optimizer='adam', loss='mse')

history = model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.15, callbacks=[earlystopping])

Epoch 1/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.2528 - val_loss: 0.5132
Epoch 2/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.4256 - val_loss: 0.3494
Epoch 3/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.2918 - val_loss: 0.3239
Epoch 4/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.2790 - val_loss: 0.3028
Epoch 5/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.2455 - val_loss: 0.2926
Epoch 6/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.2141 - val_loss: 0.2810
Epoch 7/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.2332 - val_loss: 0.2899
Epoch 8/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.2391 - val_loss: 0.2906
Epoch 9/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━

In [None]:
y_pred_tf = model.predict(x_test)

#rmse adalah root mean squeared error, gunanya untuk menghitung eror dari suatu model dengan cara meng kuadratkan hasil eror MSE. semakin kecil RMSE semakin baik modelnya
rmse_tf = np.sqrt(mean_squared_error(y_test, y_pred_tf))
mse_tf = mean_squared_error(y_test, y_pred_tf)

#pada R2 gunanya untuk menjelaskan variabilitas dalam model, range nilainya antara 0 - 1, semakin besar nilai R2 maka semakin bagus training yang dilakukan model.
#R2 sendiri sudah seperti akurasi jika dijadikan persentase
r2_tf = r2_score(y_test, y_pred_tf)
print(f'RMSE: {rmse_tf}\nMSE: {mse_tf}\nR2: {r2_tf}')

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
RMSE: 0.48980202507088344
MSE: 0.23990602376353834
R2: 0.66865330397692


In [None]:
#untuk menghitung range antara min dan max dari model test, jika range dari test nya sekitar 5 maka model sudah cukup baguis
y_min_tf = tf.reduce_min(y_test).numpy().item()
y_max_tf = tf.reduce_max(y_test).numpy().item()
y_range_tf = y_max_tf - y_min_tf

#menghitung range prediksi, range prediksi harusnya mendekati range dari y_test, kalau range pred nya kurang dari range y_test maka training kurang optimal
pred_min_tf = np.min(y_pred_tf)
pred_max_tf = np.max(y_pred_tf)
pred_range_tf = pred_max_tf - pred_min_tf

print(f"y_min:{y_min_tf}, y_max:{y_max_tf}, Rentang y_test: {y_range_tf}")
print(f"pred_min :{pred_min_tf}, pred_max:{pred_max_tf}, Rentang y_pred: {pred_range_tf}")

y_min:-1.593408074176033, y_max:4.034672838553087, Rentang y_test: 5.62808091272912
pred_min :-1.0459221601486206, pred_max:3.8609933853149414, Rentang y_pred: 4.906915664672852
