In [None]:
#import Library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
#Loading Dataset
titanic_data = pd.read_csv("/content/gdrive/MyDrive/praktikum_ml/Pertemuan-9/Data/tested.csv")

In [None]:
titanic_data.head()

In [None]:
titanic_data.tail()

In [None]:
titanic_data.shape

In [None]:
titanic_data.info()

In [None]:
#Missing Value
titanic_data.isnull().sum()

In [None]:
titanic_data = titanic_data.drop(columns=['PassengerId','Cabin', 'Name', 'Ticket'], axis=1)

In [None]:
titanic_data.boxplot(column=['Age'])
plt.title("Boxplot of Age with Outliers")
plt.show()

In [None]:
titanic_data.boxplot(column=['Fare'])
plt.title("Boxplot of Age with Outliers")
plt.show()

In [None]:
titanic_data['Age'].fillna(titanic_data['Age'].median(), inplace=True)
titanic_data['Fare'].fillna(titanic_data['Fare'].median(), inplace=True)

In [None]:
titanic_data.isnull().sum()

In [None]:
titanic_data.describe()

In [None]:
titanic_data['Survived'].value_counts()

In [None]:
titanic_data['Sex'].value_counts()

In [None]:
titanic_data['Pclass'].value_counts()

In [None]:
print(titanic_data.columns)

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20, 15))

axes = axes.flatten()

sns.countplot(x='Survived', data=titanic_data, ax=axes[0])
axes[0].set_title('Survived')

sns.countplot(x='Sex', data=titanic_data, ax=axes[1])
axes[1].set_title('Sex')

sns.countplot(x='Pclass', data=titanic_data, ax=axes[2])
axes[2].set_title('Pclass')

sns.countplot(x='Sex', hue='Survived', data=titanic_data, ax=axes[3])
axes[3].set_title('Survival by Sex')

sns.countplot(x='Pclass', hue='Survived', data=titanic_data, ax=axes[4])
axes[4].set_title('Survival by Pclass')

sns.countplot(x='Embarked', hue='Survived', data=titanic_data, ax=axes[5])
axes[5].set_title('Survival by Embarked')

plt.tight_layout()
plt.show()


In [None]:
replacements = {
    'Sex': {'male': 0, 'female': 1},
    'Embarked': {'S': 0, 'C': 1, 'Q': 2}
}

titanic_data.replace(replacements, inplace=True)

In [None]:
X = titanic_data.drop(columns=['Survived'])
Y = titanic_data['Survived']

In [None]:
X.head()

In [None]:
Y.head()

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()
nb_model.fit(X_train_scaled, Y_train)

In [None]:
#Akurasi
train_pred_nb  = nb_model.predict(X_train_scaled)
test_pred_nb = nb_model.predict(X_test_scaled)

In [None]:
print("Training Accuracy (NB):", accuracy_score(Y_train, train_pred_nb))
print("Testing Accuracy (NB):", accuracy_score(Y_test, test_pred_nb))

In [None]:
#Visualisasi Confusion Matrix (Naive Bayes)
plt.figure(figsize=(6,4))
cm_nb = confusion_matrix(Y_test, test_pred_nb)

sns.heatmap(cm_nb, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Not Survived', 'Survived'],
            yticklabels=['Not Survived', 'Survived'])

plt.title('Confusion Matrix (Naive Bayes)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
print("\nClasification Report (NB):")
print(classification_report(Y_test, test_pred_nb))

In [None]:
# Cross Validation
from sklearn.model_selection import cross_val_score

cv_nb = cross_val_score(nb_model, X, Y, cv=5, scoring='accuracy')

print("\nNaive Bayes Cross Validation Accuracy (5-Fold):")
print("Scores:", cv_nb)
print("Mean Accuracy:", cv_nb.mean())
print("Standard Deviation:", cv_nb.std())
