In [2]:
# Step 1: Import Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt

# Step 2: Load the Dataset
# Replace with the correct path or upload mechanism
titanic_data = pd.read_csv('Titanic-Dataset.csv')

# Step 3: Preprocessing
# Dropping irrelevant columns
titanic_data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], inplace=True)

# Handling missing values
titanic_data['Age'].fillna(titanic_data['Age'].mean(), inplace=True)  # Filling missing age with mean
titanic_data['Embarked'].fillna(titanic_data['Embarked'].mode()[0], inplace=True)  # Filling missing embarked

# Encoding categorical columns
titanic_data.replace({'Sex': {'male': 0, 'female': 1}, 'Embarked': {'S': 0, 'C': 1, 'Q': 2}}, inplace=True)

# Step 4: Feature Selection
# Defining X (features) and y (target)
X = titanic_data.drop(columns=['Survived'], axis=1)
y = titanic_data['Survived']

# Step 5: Data Splitting
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 7: Model Building (Logistic Regression)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 8: Model Evaluation
# Predicting on the training set
y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")

# Predicting on the test set
y_test_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Step 9: Prediction Function (for new passenger details)
def get_passenger_details():
    Pclass = int(input("Enter Passenger Class (1, 2, or 3): "))
    Sex = input("Enter Sex (male or female): ").lower()
    Age = float(input("Enter Age: "))
    SibSp = int(input("Enter number of siblings/spouses aboard: "))
    Parch = int(input("Enter number of parents/children aboard: "))
    Fare = float(input("Enter Fare: "))
    Embarked = input("Enter Port of Embarkation (S, C, Q): ").upper()

    # Encode the inputs
    Sex = 0 if Sex == 'male' else 1
    Embarked = {'S': 0, 'C': 1, 'Q': 2}[Embarked]

    # Create a DataFrame with the inputs
    passenger_data = pd.DataFrame({
        'Pclass': [Pclass],
        'Sex': [Sex],
        'Age': [Age],
        'SibSp': [SibSp],
        'Parch': [Parch],
        'Fare': [Fare],
        'Embarked': [Embarked]
    })

    # Scale the input
    passenger_data_scaled = scaler.transform(passenger_data)

    # Make the prediction
    prediction = model.predict(passenger_data_scaled)
    result = "Survived" if prediction[0] == 1 else "Did not survive"
    print(f"The passenger would have: {result}")

# Example usage
get_passenger_details()


Training Accuracy: 80.34%
Test Accuracy: 79.89%
Enter Passenger Class (1, 2, or 3): 3
Enter Sex (male or female): male
Enter Age: 35
Enter number of siblings/spouses aboard: 0
Enter number of parents/children aboard: 0
Enter Fare: 373450
Enter Port of Embarkation (S, C, Q): S
The passenger would have: Survived
