In [1]:
# Step 1: Import required libraries
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score

# Step 2: Load the Titanic dataset
df = sns.load_dataset('titanic')

# Step 3: Drop unnecessary or mostly-empty columns
df = df.drop(columns=['deck', 'embark_town', 'alive'])

# Step 4: Handle missing values
df['age'] = df['age'].fillna(df['age'].mean())   # ✅ Fix: no inplace=True
df = df.dropna(subset=['embarked'])              # Drop rows where 'embarked' is missing
df = df.dropna()                                 # Drop any remaining missing data

# Step 5: Convert categorical columns to numeric using LabelEncoder
label_encoder = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object' or str(df[col].dtype) == 'category':
        df[col] = label_encoder.fit_transform(df[col].astype(str))  # ✅ Fix: convert to str

# Step 6: Normalize numerical columns using MinMaxScaler
scaler = MinMaxScaler()
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# Step 7: Split the data into features (X) and target (y)
X = df.drop('survived', axis=1)  # Features
y = df['survived']               # Target

# Step 8: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 9: Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 10: Predict and evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("\n✅ Model Accuracy: {:.2f}%".format(accuracy * 100))



✅ Model Accuracy: 80.34%
