<a href="https://colab.research.google.com/github/maruwrks/Deep-Learning-Task/blob/main/DeepLearning_Task_1_Week_2_Regression_Second_Hand_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning training model

In [None]:
# pip install torch torchvision

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping

## Data Exploring

In [136]:
url = "https://raw.githubusercontent.com/farrelrassya/teachingMLDL/refs/heads/main/02.%20Deep%20Learning/Dataset/secondhanddataset.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,v.id,on road old,on road now,years,km,rating,condition,economy,top speed,hp,torque,current price
0,1,535651,798186,3,78945,1,2,14,177,73,123,351318.0
1,2,591911,861056,6,117220,5,9,9,148,74,95,285001.5
2,3,686990,770762,2,132538,2,8,15,181,53,97,215386.0
3,4,573999,722381,4,101065,4,3,11,197,54,116,244295.5
4,5,691388,811335,6,61559,3,9,12,160,53,105,531114.5


In [137]:
for col in df.columns:
    if df[col].dtype == 'int64':
        df[col] = df[col].astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   v.id           1000 non-null   float64
 1   on road old    1000 non-null   float64
 2   on road now    1000 non-null   float64
 3   years          1000 non-null   float64
 4   km             1000 non-null   float64
 5   rating         1000 non-null   float64
 6   condition      1000 non-null   float64
 7   economy        1000 non-null   float64
 8   top speed      1000 non-null   float64
 9   hp             1000 non-null   float64
 10  torque         1000 non-null   float64
 11  current price  1000 non-null   float64
dtypes: float64(12)
memory usage: 93.9 KB


In [138]:
x = df.drop('current price', axis=1)
y = df['current price']

## Data preperation

In [139]:
#split data menjadi 80% train dan 20% test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [140]:
#gunakan scaler untuk membuat model lebih stabil lagi
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape[1]

11

In [141]:
#ini optional, output tidak harus di scaling lagi
y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test = y_scaler.transform(y_test.values.reshape(-1, 1))

# Pytorch

In [142]:
#ubah data menjadi format tensor untuk train dan test nanti
x_train_torch = torch.tensor(x_train, dtype=torch.float32)
x_test_torch = torch.tensor(x_test, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

## Modeling Neural Networks

In [143]:
#modeling menggunakan MLP(Multi-Layer Perceptron)
class Modeling(nn.Module):
  def __init__(self,input):
    super(Modeling, self).__init__()
    self.fc1 = nn.Linear(input, 128)
    self.fc2 = nn.Linear(128, 64)
    self.fc3 = nn.Linear(64, 32)
    self.fc4 = nn.Linear(32, 1)
  def forward(self, x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = torch.relu(self.fc3(x))
    x = self.fc4(x)
    return x

In [144]:
model = Modeling(x_train.shape[1])
criterion = nn.MSELoss()#menggunakan Mean Squared Error Loss, karena variabel yang kita gunakan bukan binary
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Training Data

In [145]:
#Training model menggunakan pytorch
for epoch in range(100):
  model.train()
  optimizer.zero_grad()
  outputs = model(x_train_torch)
  loss = criterion(outputs, y_train_torch)
  loss.backward()
  optimizer.step()

#Kode ini untuk menentukan validation dari loss yang kita hitung
  model.eval()
  with torch.no_grad():
    val_out = model(x_test_torch)
    val_loss = criterion(val_out, y_test_torch)

  if epoch % 10 == 0:
    print(f'Epoch {epoch}, Train Loss: {loss.item()}, Val Loss: {val_loss.item()}')

Epoch 0, Train Loss: 1.0303854942321777, Val Loss: 1.0747994184494019
Epoch 10, Train Loss: 0.7040830254554749, Val Loss: 0.7247704267501831
Epoch 20, Train Loss: 0.20842887461185455, Val Loss: 0.1918095201253891
Epoch 30, Train Loss: 0.07183828204870224, Val Loss: 0.06947480887174606
Epoch 40, Train Loss: 0.01885298639535904, Val Loss: 0.02484544739127159
Epoch 50, Train Loss: 0.014511693269014359, Val Loss: 0.01645059511065483
Epoch 60, Train Loss: 0.00930380541831255, Val Loss: 0.01170959323644638
Epoch 70, Train Loss: 0.0063954126089811325, Val Loss: 0.009445782750844955
Epoch 80, Train Loss: 0.0049708252772688866, Val Loss: 0.007829836569726467
Epoch 90, Train Loss: 0.004139342810958624, Val Loss: 0.007276847492903471


In [146]:
#buat variable predict untuk mengevaluasi performa model yang diuji
y_pred_torch = model(x_test_torch).detach().numpy()

#rmse adalah root mean squeared error, gunanya untuk menghitung eror dari suatu model dengan cara meng kuadratkan hasil eror MSE. semakin kecil RMSE semakin baik modelnya
rmse_torch = np.sqrt(mean_squared_error(y_test,y_pred_torch))
mse_torch = mean_squared_error(y_test, y_pred_torch)

#pada R2 gunanya untuk menjelaskan variabilitas dalam model, range nilainya antara 0 - 1, semakin besar nilai R2 maka semakin bagus training yang dilakukan model.
#R2 sendiri sudah seperti akurasi jika dijadikan persentase
r2_torch = r2_score(y_test, y_pred_torch)

In [147]:
print(f'RMSE: {rmse_torch} \nMSE: {mse_torch}\nR2: {r2_torch}')

RMSE: 0.08158743728437035 
MSE: 0.006656509922631064
R2: 0.9938060920722019


In [148]:
#untuk menghitung range antara min dan max dari model test, jika range dari test nya sekitar 4 mengikuti RMSE maka model sudah cukup bagus
y_min = torch.min(y_test_torch).item()
y_max = torch.max(y_test_torch).item()
y_range = y_max - y_min

#menghitung range prediksi, range prediksi harusnya mendekati range dari y_test, kalau range pred nya kurang dari range y_test maka training kurang optimal
pred_min = np.min(y_pred_torch)
pred_max = np.max(y_pred_torch)
pred_range = pred_max - pred_min

print(f"y_min:{y_min}, y_max:{y_max}, Rentang y_test: {y_range}")
print(f"pred_min :{pred_min}, pred_max:{pred_max}, Rentang y_pred: {pred_range}")


y_min:-1.993978500366211, y_max:2.009277820587158, Rentang y_test: 4.003256320953369
pred_min :-2.009883403778076, pred_max:1.9720513820648193, Rentang y_pred: 3.9819347858428955


# Tensorflow

In [149]:
#untuk mencari nilai yang hilang dan mengganti nilainya dengan rata rata dari impu
imputer = SimpleImputer(strategy='mean')
x_train = imputer.fit_transform(x_train)
x_test = imputer.transform(x_test)

In [150]:
inputs = tf.keras.Input(shape=(x_train.shape[1],))

x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(32, activation='relu')(x)
x = tf.keras.layers.Dense(16, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs,outputs)

#menggunakan early stopping untuk mengurangi overfitting dan underfitting
earlystopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.compile(optimizer='adam', loss='mse')

history = model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.15, callbacks=[earlystopping])

Epoch 1/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 1.0895 - val_loss: 0.7889
Epoch 2/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6306 - val_loss: 0.4147
Epoch 3/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.3106 - val_loss: 0.1549
Epoch 4/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0974 - val_loss: 0.0840
Epoch 5/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0610 - val_loss: 0.0569
Epoch 6/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0381 - val_loss: 0.0407
Epoch 7/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0271 - val_loss: 0.0330
Epoch 8/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0198 - val_loss: 0.0293
Epoch 9/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━

In [151]:
y_pred_tf = model.predict(x_test)

#rmse adalah root mean squeared error, gunanya untuk menghitung eror dari suatu model dengan cara meng kuadratkan hasil eror MSE. semakin kecil RMSE semakin baik modelnya
rmse_tf = np.sqrt(mean_squared_error(y_test, y_pred_tf))
mse_tf = mean_squared_error(y_test, y_pred_tf)

#pada R2 gunanya untuk menjelaskan variabilitas dalam model, range nilainya antara 0 - 1, semakin besar nilai R2 maka semakin bagus training yang dilakukan model.
#R2 sendiri sudah seperti akurasi jika dijadikan persentase
r2_tf = r2_score(y_test, y_pred_tf)
print(f'RMSE: {rmse_tf}\nMSE: {mse_tf}\nR2: {r2_tf}')

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
RMSE: 0.11082346213145264
MSE: 0.012281839758801516
R2: 0.9885717011565849


In [152]:
#untuk menghitung range antara min dan max dari model test, jika range dari test nya sekitar 5 maka model sudah cukup baguis
y_min_tf = tf.reduce_min(y_test).numpy().item()
y_max_tf = tf.reduce_max(y_test).numpy().item()
y_range_tf = y_max_tf - y_min_tf

#menghitung range prediksi, range prediksi harusnya mendekati range dari y_test, kalau range pred nya kurang dari range y_test maka training kurang optimal
pred_min_tf = np.min(y_pred_tf)
pred_max_tf = np.max(y_pred_tf)
pred_range_tf = pred_max_tf - pred_min_tf

print(f"y_min:{y_min_tf}, y_max:{y_max_tf}, Rentang y_test: {y_range_tf}")
print(f"pred_min :{pred_min_tf}, pred_max:{pred_max_tf}, Rentang y_pred: {pred_range_tf}")

y_min:-1.993978528521615, y_max:2.009277927849781, Rentang y_test: 4.003256456371396
pred_min :-1.9088668823242188, pred_max:2.1071486473083496, Rentang y_pred: 4.016015529632568


model sangat bagus sehingga menghasilkan R2 pada tensorflow dan pytorch hampir mendekati 100%