In [None]:
# ========================================
# TUGAS KLASIFIKASI - DATASET TITANIC
# ========================================

# 1. Import Library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# 2. Load Dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)
print("Data berhasil diambil!")
df.head()

# 3. EDA dan Preprocessing
print("\n--- Info Dataset ---")
print(df.info())
print("\n--- Cek Missing Values ---")
print(df.isnull().sum())

# Hapus kolom tidak perlu
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

# Isi nilai kosong
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# Encode data kategorikal
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['Embarked'] = le.fit_transform(df['Embarked'])

# 4. Split Data
X = df.drop('Survived', axis=1)
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Normalisasi
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. Model 1: Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)

# 7. Model 2: Decision Tree
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)

# 8. Evaluasi
def evaluasi_model(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return acc, prec, rec, f1, cm

hasil_log = evaluasi_model(y_test, y_pred_log)
hasil_tree = evaluasi_model(y_test, y_pred_tree)

print("\n=== Logistic Regression ===")
print("Confusion Matrix:\n", hasil_log[4])
print(f"Akurasi: {hasil_log[0]:.2f}, Presisi: {hasil_log[1]:.2f}, Recall: {hasil_log[2]:.2f}, F1: {hasil_log[3]:.2f}")

print("\n=== Decision Tree ===")
print("Confusion Matrix:\n", hasil_tree[4])
print(f"Akurasi: {hasil_tree[0]:.2f}, Presisi: {hasil_tree[1]:.2f}, Recall: {hasil_tree[2]:.2f}, F1: {hasil_tree[3]:.2f}")

# 9. Perbandingan Model
hasil = pd.DataFrame({
    'Model': ['Logistic Regression', 'Decision Tree'],
    'Accuracy': [hasil_log[0], hasil_tree[0]],
    'Precision': [hasil_log[1], hasil_tree[1]],
    'Recall': [hasil_log[2], hasil_tree[2]],
    'F1-Score': [hasil_log[3], hasil_tree[3]]
})
print("\n=== Perbandingan Model ===")
print(hasil)
