In [1]:
import pandas as pd

data = pd.read_csv('train.csv')
print(data.head())


   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  


In [None]:
# Titanic Survival Prediction System - All in One Cell

# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

# Step 2: Load the Dataset
data = pd.read_csv('train.csv')  # make sure train.csv is in the same folder
print("First 5 rows of the dataset:")
print(data.head())

# Step 3a: Handle missing values
print("\nMissing values per column:")
print(data.isnull().sum())
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Step 3b: Feature Selection
features = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked']
X = data[features]
y = data['Survived']

# Step 3c: Encode categorical variables
le_sex = LabelEncoder()
X['Sex'] = le_sex.fit_transform(X['Sex'])
le_embarked = LabelEncoder()
X['Embarked'] = le_embarked.fit_transform(X['Embarked'])

# Step 3d: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Step 5: Train the Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 5b: Evaluate Model
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 6: Save the Model & Scaler
joblib.dump(model, 'titanic_survival_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("\nModel and scaler saved successfully!")

# Step 7: Test Reloading the Model
loaded_model = joblib.load('titanic_survival_model.pkl')
loaded_scaler = joblib.load('scaler.pkl')
sample_input = X_test[:5]
sample_pred = loaded_model.predict(sample_input)
print("\nSample predictions for first 5 test rows:", sample_pred)
