In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv("Titanic.csv")
df = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived']]
df.dropna(inplace=True)

le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['Embarked'] = le.fit_transform(df['Embarked'])

X = df.drop("Survived", axis=1)
y = df["Survived"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(random_state=42)
xgb = XGBClassifier(eval_metric='logloss', random_state=42)

rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)

rf_probs = rf.predict_proba(X_test)[:, 1]
xgb_probs = xgb.predict_proba(X_test)[:, 1]

avg_probs = (rf_probs + xgb_probs) / 2
custom_preds = (avg_probs >= 0.5).astype(int)

custom_acc = accuracy_score(y_test, custom_preds)

vc = VotingClassifier(estimators=[('rf', rf), ('xgb', xgb)], voting='soft')
vc.fit(X_train, y_train)
vc_preds = vc.predict(X_test)
vc_acc = accuracy_score(y_test, vc_preds)

print("Custom Ensemble Accuracy:", custom_acc)
print("VotingClassifier Accuracy:", vc_acc)

Custom Ensemble Accuracy: 0.525
VotingClassifier Accuracy: 0.525
