In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Setup Data
np.random.seed(88)
n_cust = 2000

# Generate Data (LANGSUNG ANGKA)
data = {
    'Lama_Langganan': np.random.randint(1, 72, n_cust), # 1 s/d 72 bulan
    'Tagihan_Bulanan': np.random.randint(200, 1500, n_cust), # 200rb s/d 1.5jt
    'Jenis_Kontrak': np.random.choice([0, 1, 2], n_cust, p=[0.5, 0.3, 0.2]), # 0: Bulanan (Paling rawan kabur)
    'Punya_TechSupport': np.random.choice([0, 1], n_cust),
    'Churn': np.zeros(n_cust) # Target awal 0 semua
}

df_churn = pd.DataFrame(data)

# BIKIN POLA (Biar Model Bisa Belajar)
# Logika: Kalau Tagihan Mahal + Kontrak Bulanan + Gak ada Support = Kabur (1)
risk_score = (df_churn['Tagihan_Bulanan'] / 1000) * 2  
risk_score -= (df_churn['Lama_Langganan'] * 0.1)       
risk_score -= (df_churn['Jenis_Kontrak'] * 3)          
risk_score -= (df_churn['Punya_TechSupport'] * 2)

# Tentukan Target Akhir (0 atau 1)
threshold = risk_score.median()
df_churn['Churn'] = np.where(risk_score > threshold, 1, 0)

print("✅ Data Siap! Semua sudah dalam bentuk angka.")
print(df_churn.head())

✅ Data Siap! Semua sudah dalam bentuk angka.
   Lama_Langganan  Tagihan_Bulanan  Jenis_Kontrak  Punya_TechSupport  Churn
0              33             1370              2                  1      0
1              50              398              0                  0      1
2              21             1073              2                  0      0
3              63             1045              0                  0      1
4              70              664              0                  1      0


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [4]:
df_churn

Unnamed: 0,Lama_Langganan,Tagihan_Bulanan,Jenis_Kontrak,Punya_TechSupport,Churn
0,33,1370,2,1,0
1,50,398,0,0,1
2,21,1073,2,0,0
3,63,1045,0,0,1
4,70,664,0,1,0
...,...,...,...,...,...
1995,44,280,1,1,0
1996,42,1435,2,1,0
1997,45,1345,2,0,0
1998,54,1389,0,1,1


In [5]:
x = df_churn.drop(columns='Churn')
y = df_churn['Churn']

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y , test_size = 0.2, random_state=42)

In [7]:
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier()
rf = RandomForestClassifier()
svm = SVC(random_state=42)
nv = GaussianNB()

knn.fit(x_train, y_train)
dt.fit(x_train, y_train)
rf.fit(x_train, y_train)
svm.fit(x_train, y_train)
nv.fit(x_train, y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [8]:
def evaluate_model(model, x_test, y_test):
    y_pred = model.predict(x_test)
    return  {
        "accuracy" : accuracy_score(y_test, y_pred),
        'Precision' : precision_score(y_test, y_pred),
        'Recall' : recall_score(y_test, y_pred),
        'F1-Score' : f1_score(y_test, y_pred)
    }

result = {
    'KNN' : evaluate_model(knn, x_test, y_test),
    'Decision Tree' : evaluate_model(dt, x_test, y_test),
    'Random Forest' : evaluate_model(rf, x_test, y_test),
    'Support Vector Machine' : evaluate_model(svm, x_test, y_test),
    'Naive Bayes' : evaluate_model(nv, x_test, y_test)
}
summary = pd.DataFrame(result).T
summary

Unnamed: 0,accuracy,Precision,Recall,F1-Score
KNN,0.7,0.683168,0.71134,0.69697
Decision Tree,0.99,0.994792,0.984536,0.989637
Random Forest,0.995,0.994845,0.994845,0.994845
Support Vector Machine,0.6425,0.632124,0.628866,0.630491
Naive Bayes,0.975,0.979167,0.969072,0.974093


# **STUDI KASUS RUMAH SAKIT**

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

np.random.seed(123)
n_pasien = 1000

# Generate Data Medis
data = {
    'Umur': np.random.randint(29, 78, n_pasien),
    'Kolesterol': np.random.randint(126, 350, n_pasien),
    'Detak_Jantung_Max': np.random.randint(90, 202, n_pasien),
    'Nyeri_Dada': np.random.choice([0, 1, 2], n_pasien, p=[0.4, 0.4, 0.2]), 
    'Target': np.zeros(n_pasien)
}

df_jantung = pd.DataFrame(data)

# BIKIN POLA (Aturan Medis Sederhana)
# Logika: Tua + Kolesterol Tinggi + Jantung Lemah + Ada Nyeri Dada = Risiko Tinggi
skor_sakit = (df_jantung['Umur'] / 80) * 3
skor_sakit += (df_jantung['Kolesterol'] / 300) * 2
skor_sakit -= (df_jantung['Detak_Jantung_Max'] / 200) * 2 # Jantung kuat skornya malah turun
skor_sakit += (df_jantung['Nyeri_Dada'] * 2)

threshold = np.percentile(skor_sakit, 60) # 40% orang dianggap sakit
df_jantung['Target'] = np.where(skor_sakit > threshold, 1, 0)

print("✅ Data Pasien Siap Analisis!")
print(df_jantung.head())

✅ Data Pasien Siap Analisis!
   Umur  Kolesterol  Detak_Jantung_Max  Nyeri_Dada  Target
0    74         218                128           2       1
1    31         185                177           0       0
2    57         152                129           0       0
3    63         206                187           1       0
4    67         250                192           1       1


In [10]:
df_jantung

Unnamed: 0,Umur,Kolesterol,Detak_Jantung_Max,Nyeri_Dada,Target
0,74,218,128,2,1
1,31,185,177,0,0
2,57,152,129,0,0
3,63,206,187,1,0
4,67,250,192,1,1
...,...,...,...,...,...
995,45,331,100,0,0
996,56,182,198,0,0
997,55,284,178,0,0
998,54,245,112,1,1


In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

x = df_jantung.drop(columns='Target')
y = df_jantung['Target']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

knn = KNeighborsClassifier().fit(x_train, y_train)
dt = DecisionTreeClassifier().fit(x_train, y_train)
rf = RandomForestClassifier().fit(x_train, y_train)
svm = SVC(random_state=42).fit(x_train, y_train)
nv = GaussianNB().fit(x_train, y_train)

knn
dt
rf
svm 
nv

0,1,2
,priors,
,var_smoothing,1e-09


In [12]:
def evaluate_model2(model , x_test, y_test):
    y_pred = model.predict(x_test)
    return {
        'Accuracy' : accuracy_score(y_test, y_pred),
        'Precision' : precision_score(y_test, y_pred),
        'Recall' : recall_score(y_test, y_pred),
        'F1-Score' : f1_score(y_test, y_pred)
    }
result = {
        'Knearest Neighbors' : evaluate_model2(knn, x_test, y_test),
        'Decision Tree' : evaluate_model2(dt, x_test, y_test),
        'Random Forest' : evaluate_model2(rf, x_test, y_test),
        'Support Vector Machine' : evaluate_model2(svm, x_test, y_test),
        'Naive Bayes' : evaluate_model2(nv, x_test, y_test)
    }

summary2 = pd.DataFrame(result)
summary2.T

Unnamed: 0,Accuracy,Precision,Recall,F1-Score
Knearest Neighbors,0.65,0.551724,0.607595,0.578313
Decision Tree,0.94,0.894118,0.962025,0.926829
Random Forest,0.975,0.95122,0.987342,0.968944
Support Vector Machine,0.625,0.541667,0.329114,0.409449
Naive Bayes,0.97,0.939759,0.987342,0.962963
