In [4]:
# Import Libraries
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score, explained_variance_score

# Load the Data
data = pd.read_csv('train.csv')

# Data Preprocessing
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
data = pd.get_dummies(data, columns=['Sex', 'Embarked'], drop_first=True)

# Select Features and Target Variable
features = data[['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex_male', 'Embarked_Q', 'Embarked_S']]
target = data['Survived']

# Split the Data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression Model
logistic_model = LogisticRegression()
start_time = time.time()
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)
logistic_time = time.time() - start_time

# Random Forest Classifier Model
rf_model = RandomForestClassifier()
start_time = time.time()
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
rf_time = time.time() - start_time

# Evaluation Metrics
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    explained_var = explained_variance_score(y_true, y_pred)

    return accuracy, mae, mse, r2, explained_var

# Logistic Regression Metrics
logistic_metrics = evaluate_model(y_test, y_pred_logistic)
print("Logistic Regression Metrics:")
print(f"  Accuracy: {logistic_metrics[0]:.2f}")
print(f"  Mean Absolute Error (MAE): {logistic_metrics[1]:.2f}")
print(f"  Mean Squared Error (MSE): {logistic_metrics[2]:.2f}")
print(f"  R² Score: {logistic_metrics[3]:.2f}")
print(f"  Explained Variance: {logistic_metrics[4]:.2f}")
print(f"  Training/Prediction Time: {logistic_time:.4f}s\n")

# Random Forest Metrics
rf_metrics = evaluate_model(y_test, y_pred_rf)
print("Random Forest Metrics:")
print(f"  Accuracy: {rf_metrics[0]:.2f}")
print(f"  Mean Absolute Error (MAE): {rf_metrics[1]:.2f}")
print(f"  Mean Squared Error (MSE): {rf_metrics[2]:.2f}")
print(f"  R² Score: {rf_metrics[3]:.2f}")
print(f"  Explained Variance: {rf_metrics[4]:.2f}")
print(f"  Training/Prediction Time: {rf_time:.4f}s")

Logistic Regression Metrics:
  Accuracy: 0.81
  Mean Absolute Error (MAE): 0.19
  Mean Squared Error (MSE): 0.19
  R² Score: 0.22
  Explained Variance: 0.22
  Training/Prediction Time: 0.0038s

Random Forest Metrics:
  Accuracy: 0.81
  Mean Absolute Error (MAE): 0.19
  Mean Squared Error (MSE): 0.19
  R² Score: 0.22
  Explained Variance: 0.22
  Training/Prediction Time: 0.2040s
