In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')

url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
titanic_data = pd.read_csv(url)

titanic_data.head()
titanic_data.isnull().sum()

X = titanic_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
y = titanic_data['Survived']


X.head()

# ‚úÖ Use LabelEncoder for 'Sex'
le = LabelEncoder()
X.loc[:, 'Sex'] = le.fit_transform(X['Sex'])

"""X.loc[:, 'Sex'] means:

: ‚Äî All rows

'Sex' ‚Äî Only the 'Sex' column

So this selects all rows in the 'Sex' column.
You are overwriting them with the transformed values.

----------------------------------------------------------------------------------

# Access all rows in 'Age' column (by label)
X.loc[:, 'Age']

# Access first 5 rows of 3rd column (by index)
X.iloc[0:5, 2]    index - 0,1,2"""

# ‚úÖ Handle missing Age values
X.loc[:, 'Age'].fillna(X['Age'].median(), inplace=True)

titanic_data.isnull().sum()

X.isnull().sum()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Total trees trained
print(len(rf_classifier.estimators_))  # Should be 100 if n_estimators=100

# Check individual tree
tree = rf_classifier.estimators_[0]
print(tree)

# 16 --> sqrt(16) -- 

print(rf_classifier.feature_importances_)

y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", classification_rep)

sample = X_test.iloc[0:1]
prediction = rf_classifier.predict(sample)

sample_dict = sample.iloc[0].to_dict()
print(f"\nSample Passenger: {sample_dict}")
print(f"Predicted Survival: {'Survived' if prediction[0] == 1 else 'Did Not Survive'}")

sample.to_dict()

sample_dict_sam = sample.to_dict()
print(sample_dict_sam)



100
DecisionTreeClassifier(max_features='sqrt', random_state=1608637542)
[0.07783088 0.2712546  0.26002009 0.0495455  0.03679038 0.30455855]
Accuracy: 0.80

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.85      0.83       105
           1       0.77      0.73      0.75        74

    accuracy                           0.80       179
   macro avg       0.79      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179


Sample Passenger: {'Pclass': 3, 'Sex': 1, 'Age': 28.0, 'SibSp': 1, 'Parch': 1, 'Fare': 15.2458}
Predicted Survival: Did Not Survive
{'Pclass': {709: 3}, 'Sex': {709: 1}, 'Age': {709: 28.0}, 'SibSp': {709: 1}, 'Parch': {709: 1}, 'Fare': {709: 15.2458}}


In [4]:
import joblib
# Save model and encoder
joblib.dump(rf_classifier, "model.pkl")
joblib.dump(le, "encoder.pkl")



['encoder.pkl']

In [2]:
%%writefile app.py
import streamlit as st
import numpy as np
import joblib

# Load model & encoder
model = joblib.load("model.pkl")
encoder = joblib.load("encoder.pkl")

st.set_page_config(page_title="Titanic Survival Prediction", layout="centered")

st.title("üö¢ Titanic Survival Prediction App")
st.write("Enter passenger details to predict survival")

# Inputs
pclass = st.selectbox("Passenger Class", [1, 2, 3])
sex = st.selectbox("Sex", ["male", "female"])
age = st.number_input("Age", min_value=1, max_value=100, value=30)
sibsp = st.number_input("Siblings/Spouses Aboard", min_value=0, max_value=10, value=0)
parch = st.number_input("Parents/Children Aboard", min_value=0, max_value=10, value=0)
fare = st.number_input("Fare", min_value=0.0, value=32.0)

# Encode sex
sex_encoded = encoder.transform([sex])[0]
# Predict
if st.button("Predict"):
    new_data = np.array([[pclass, sex_encoded, age, sibsp, parch, fare]])
    prediction = model.predict(new_data)[0]

    if prediction == 1:
        st.success("‚úÖ Passenger SURVIVED")
    else:
        st.error("‚ùå Passenger DID NOT SURVIVE")



Overwriting app.py
