In [24]:
# menghubungkan colab dengan google drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [25]:
# memanggil data set lewat gdrive
path = "/content/gdrive/MyDrive/praktikum_ml/praktikum04"

In [26]:
import pandas as pd

# Read the CSV file with a comma delimiter
df = pd.read_csv(path + '/data/calonpembelimobil.csv', sep=',')

# cetak header data (5 baris data) dari file
df.head()


Unnamed: 0,ID,Usia,Status,Kelamin,Memiliki_Mobil,Penghasilan,Beli_Mobil
0,1,32,1,0,0,240,1
1,2,49,2,1,1,100,0
2,3,52,1,0,2,250,1
3,4,26,2,1,1,130,0
4,5,45,3,0,2,237,1


In [27]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Pisahkan fitur dan target
X = df.drop(columns=['Beli_Mobil', 'ID'])
y = df['Beli_Mobil']

# Split data train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [28]:
# Buat dan latih model Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluasi model
y_pred = model.predict(X_test)
print("Akurasi:", accuracy_score(y_test, y_pred))
print("\nLaporan Klasifikasi:\n", classification_report(y_test, y_pred))

Akurasi: 0.93

Laporan Klasifikasi:
               precision    recall  f1-score   support

           0       0.92      0.86      0.89        64
           1       0.94      0.96      0.95       136

    accuracy                           0.93       200
   macro avg       0.93      0.91      0.92       200
weighted avg       0.93      0.93      0.93       200



In [29]:
# Buat data uji baru (tanpa kolom target)
data_baru = {
    'Usia': [30, 45, 55],
    'Status': [1, 2, 3],
    'Kelamin': [0, 1, 1],
    'Memiliki_Mobil': [0, 1, 2],
    'Penghasilan': [180, 250, 300]
}

df_new = pd.DataFrame(data_baru)
df_new.to_csv(path + '/data/calonpembelimobil_baru.csv', index=False)
print("✅ File calonpembelimobil_baru.csv berhasil dibuat.")

✅ File calonpembelimobil_baru.csv berhasil dibuat.


In [30]:
# --- Uji dengan dataset baru ---
df_new = pd.read_csv(path + '/data/calonpembelimobil_baru.csv')

# Prediksi hasil pembelian
prediksi = model.predict(df_new)
df_new['Prediksi_Beli_Mobil'] = prediksi

In [31]:
# Simpan hasil prediksi ke file baru
df_new.to_csv(path + '/data/hasil_prediksi.csv', index=False)

print("\nHasil prediksi tersimpan di:", path + '/data/hasil_prediksi.csv')
print(df_new.head())


Hasil prediksi tersimpan di: /content/gdrive/MyDrive/praktikum_ml/praktikum04/data/hasil_prediksi.csv
   Usia  Status  Kelamin  Memiliki_Mobil  Penghasilan  Prediksi_Beli_Mobil
0    30       1        0               0          180                    0
1    45       2        1               1          250                    1
2    55       3        1               2          300                    1


In [32]:
import joblib

# === SIMPAN MODEL ===
model_path = path + '/model/logreg_calonpembeli.pkl'
joblib.dump(model, model_path)
print(f"\n✅ Model berhasil disimpan di: {model_path}")

# === CONTOH: LOAD MODEL ===
loaded_model = joblib.load(model_path)
print("\nModel berhasil dimuat ulang.")

# Cek prediksi ulang
print("Prediksi ulang (cek konsistensi):", loaded_model.predict(X_test[:3]))


✅ Model berhasil disimpan di: /content/gdrive/MyDrive/praktikum_ml/praktikum04/model/logreg_calonpembeli.pkl

Model berhasil dimuat ulang.
Prediksi ulang (cek konsistensi): [1 1 0]


In [33]:
# file: diagram_pipeline_logreg.py
from graphviz import Digraph

dot = Digraph(comment='Pipeline Logistic Regression')
dot.attr(rankdir='LR', size='8,5')

# Node
dot.node('A', 'Pengumpulan Data\n(calonpembelimobil.csv)', shape='box', style='rounded,filled', color='lightblue')
dot.node('B', 'Pra-pemrosesan Data\n(cleaning, encoding, split)', shape='box', style='rounded,filled', color='lightgreen')
dot.node('C', 'Pembangunan Model\n(Logistic Regression)', shape='box', style='rounded,filled', color='lightyellow')
dot.node('D', 'Evaluasi Model\n(accuracy, F1, confusion matrix)', shape='box', style='rounded,filled', color='orange')
dot.node('E', 'Simpan Model\n(logreg_calonpembeli.pkl)', shape='box', style='rounded,filled', color='lightgrey')
dot.node('F', 'Prediksi Data Baru\n(calonpembelimobil_baru.csv)', shape='box', style='rounded,filled', color='lightpink')

# Edges
dot.edges(['AB', 'BC', 'CD', 'DE', 'EF'])

# Save diagram
dot.render('/content/diagram_pipeline_logreg', format='png', cleanup=True)
print("✅ Diagram pipeline tersimpan di: /content/diagram_pipeline_logreg.png")

✅ Diagram pipeline tersimpan di: /content/diagram_pipeline_logreg.png
