In [1]:
# Titanic Survival Prediction – CodSoft

# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 2: Load Dataset
# If running in Colab, upload the dataset or read directly if available
# Example: Titanic dataset from seaborn or Kaggle

# For quick demonstration, load from seaborn
import seaborn as sns
df = sns.load_dataset('titanic')

# Step 3: Data Cleaning
# Drop rows with missing 'embarked'
df.dropna(subset=['embarked'], inplace=True)

# Fill missing 'age' with median
df['age'].fillna(df['age'].median(), inplace=True)

# Drop columns not needed for basic model
df = df[['survived','pclass','sex','age','sibsp','parch','fare','embarked']]

# Convert categorical variables
df = pd.get_dummies(df, columns=['sex','embarked'], drop_first=True)

# Step 4: Split Features and Target
X = df.drop('survived', axis=1)
y = df['survived']

# Step 5: Train-test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Model Training
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Step 7: Predictions
y_pred = model.predict(X_test)

# Step 8: Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 9: Single Passenger Prediction Example
# Sample: Pclass=3, Age=22, SibSp=1, Parch=0, Fare=7.25, sex_male=1, embarked_Q=0, embarked_S=1
sample = [[3, 22, 1, 0, 7.25, 1, 0, 1]]
predicted_survival = model.predict(sample)
print("\nPredicted Survival (1=Survived, 0=Did not survive):", predicted_survival[0])


Accuracy: 0.7696629213483146

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.78      0.81       109
           1       0.68      0.75      0.72        69

    accuracy                           0.77       178
   macro avg       0.76      0.77      0.76       178
weighted avg       0.78      0.77      0.77       178


Predicted Survival (1=Survived, 0=Did not survive): 0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)
