In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
# 1. Load Titanic Dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)



In [4]:
# 2. Data Preprocessing
# Select relevant features
df = df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]

# Handle missing values (Fill missing Age values with median)
df['Age'].fillna(df['Age'].median(), inplace=True)

# Encode categorical variables (Convert 'Sex' to 0/1)
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

# Split data into features (X) and target (y)
X = df.drop(columns=['Survived'])
y = df['Survived']



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Age'].fillna(df['Age'].median(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Sex'] = df['Sex'].map({'male': 0, 'fe

In [5]:
# 3. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [6]:
# 4. Feature Scaling (Important for Logistic Regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [7]:
# 5. Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)



In [8]:
# 6. Predictions
y_pred = model.predict(X_test_scaled)



In [9]:
# 7. Model Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))



Accuracy: 0.7988826815642458

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.86      0.83       105
           1       0.78      0.72      0.75        74

    accuracy                           0.80       179
   macro avg       0.80      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179


Confusion Matrix:
 [[90 15]
 [21 53]]


In [10]:
# 8. Predict on New Passenger Data
new_passenger = np.array([[3, 1, 25, 0, 0, 10]])  # Pclass=3, Female, Age=25, No relatives, Fare=10
new_passenger_scaled = scaler.transform(new_passenger)
prediction = model.predict(new_passenger_scaled)
print("\nSurvival Prediction for new passenger:", "Survived" if prediction[0] == 1 else "Did not survive")



Survival Prediction for new passenger: Survived


