In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
import os

def predict_titanic_survival():
    print("--- Starting Titanic Survival Prediction (Python) ---")

    df = None
    detected_filename = None
    file_found_successfully = False

    possible_filenames = [
        'Titanic-Dataset.csv - Titanic-Dataset.csv',
        'Titanic-Dataset.csv',

    ]

    for fname_attempt in possible_filenames:
        try:
            df = pd.read_csv(fname_attempt)
            detected_filename = fname_attempt
            file_found_successfully = True
            print(f"Loaded dataset using filename: '{detected_filename}'. Shape: {df.shape}")
            break
        except FileNotFoundError:
            continue
        except Exception as e:
            print(f"WARNING: Could not load '{fname_attempt}' due to format error: {e}")
            continue

    if not file_found_successfully:
        print("\nCould not find the file with common names. Listing directory contents to auto-detect...")
        try:
            current_dir_files = os.listdir('.')
            print(f"Files found in Colab directory: {current_dir_files}")

            for actual_fname_in_colab in current_dir_files:
                if "titanic" in actual_fname_in_colab.lower() and ".csv" in actual_fname_in_colab.lower():
                    print(f"Attempting to load auto-detected candidate: '{actual_fname_in_colab}'...")
                    try:
                        df = pd.read_csv(actual_fname_in_colab)
                        detected_filename = actual_fname_in_colab
                        file_found_successfully = True
                        print(f"SUCCESS: Loaded dataset using auto-detected filename: '{detected_filename}'. Shape: {df.shape}")
                        break
                    except Exception as e:
                        print(f"WARNING: Failed to load '{actual_fname_in_colab}' (possibly corrupted/bad format): {e}")

        except Exception as e:
            print(f"CRITICAL ERROR: Could not list directory contents: {e}")

    if not file_found_successfully:
        print("\nFATAL ERROR: The Titanic CSV dataset could not be found or loaded automatically.")
        print("Please ensure the 'Titanic-Dataset.csv - Titanic-Dataset.csv' file has been uploaded to this Colab session.")
        print("You can verify by clicking the folder icon on the left sidebar and checking the file list.")
        return

    print("\n--- Initial Data Snapshot (first 5 rows) ---")
    print(df.head())
    print("\n--- Missing Values Check ---")
    print(df.isnull().sum())

    features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
    target = 'Survived'

    if 'Age' in df.columns:
        mean_age = df['Age'].mean()
        df['Age'] = df['Age'].fillna(mean_age)
        print(f"\nMissing 'Age' values imputed with mean: {mean_age:.2f}")
    else:
        print("Warning: 'Age' column not found in dataset. Please check column names.")

    if 'Fare' in df.columns:
        if df['Fare'].isnull().any():
            mean_fare = df['Fare'].mean()
            df['Fare'] = df['Fare'].fillna(mean_fare)
            print(f"Missing 'Fare' values imputed with mean: {mean_fare:.2f}")
    else:
        print("Warning: 'Fare' column not found in dataset. Please check column names.")

    if 'Sex' in df.columns:
        df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
        df['Sex'] = df['Sex'].fillna(df['Sex'].mode()[0])
        print("\n'Sex' column encoded: 'male'=0, 'female'=1.")
    else:
        print("Warning: 'Sex' column not found in dataset. Please check column names.")

    for feature in features:
        if feature not in df.columns:
            print(f"Error: Required feature '{feature}' not found in the dataset after preprocessing. Please check your data.")
            return

    initial_rows = df.shape[0]
    df = df.dropna(subset=features + [target])
    rows_after_drop = df.shape[0]
    if initial_rows != rows_after_drop:
        print(f"\nDropped {initial_rows - rows_after_drop} rows with remaining missing values in selected features/target.")

    X = df[features]
    y = df[target]

    print(f"\nFeatures (X) shape: {X.shape}, Target (y) shape: {y.shape}")
    print("\n--- Prepared Features (first 5 rows) ---")
    print(X.head())

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"\nData split into training (80%) and testing (20%):")
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    model = LogisticRegression(solver='liblinear', random_state=42)
    print("\nTraining Logistic Regression model...")
    model.fit(X_train, y_train)
    print("Model training complete.")

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\n--- Model Evaluation ---")
    print(f"Accuracy on the test set: {accuracy * 100:.2f}%")

    overall_probabilities_all_data = model.predict_proba(X)[:, 1]
    overall_avg_survival_possibility = np.mean(overall_probabilities_all_data) * 100

    print("\n--- Model Coefficients (Weights) and Intercept (Bias) ---")
    for i, feature in enumerate(features):
        print(f"Weight for '{feature}': {model.coef_[0][i]:.4f}")
    print(f"Intercept (Bias): {model.intercept_[0]:.4f}")

    print("\n--- Demonstrating Predictions for Example Passengers ---")

    passenger_1_data = pd.DataFrame([[1, 1, 25, 0, 0, 100]],
                                    columns=features)

    passenger_2_data = pd.DataFrame([[3, 0, 60, 1, 0, 15]],
                                    columns=features)

    passenger_3_data = pd.DataFrame([[2, 0, 20, 0, 0, 25]],
                                    columns=features)

    example_passengers = pd.concat([passenger_1_data, passenger_2_data, passenger_3_data], ignore_index=True)

    print("\nExample Passengers to Predict:")
    print(example_passengers)

    probabilities = model.predict_proba(example_passengers)[:, 1]

    predictions = model.predict(example_passengers)

    for i, passenger_data in example_passengers.iterrows():
        survival_status = "Survived" if predictions[i] == 1 else "Did Not Survive"
        print(f"\nPassenger {i+1} (Pclass={int(passenger_data['Pclass'])}, Sex={'Female' if passenger_data['Sex']==1 else 'Male'}, Age={int(passenger_data['Age'])}):")
        print(f"  Predicted Outcome: {survival_status}")
        print(f"  The possibility of this passenger to survive in Titanic is: {probabilities[i]*100:.2f}%")

    print("\n--- Detailed Predictions for Test Set Passengers ---")

    predictions_df = pd.DataFrame({
        'Actual_Survived': y_test.reset_index(drop=True),
        'Predicted_Survived': y_pred,
        'Predicted_Prob_Survived': model.predict_proba(X_test)[:, 1]
    })

    predictions_df['Actual_Survived_Label'] = predictions_df['Actual_Survived'].map({0: 'Did Not Survive', 1: 'Survived'})
    predictions_df['Predicted_Survived_Label'] = predictions_df['Predicted_Survived'].map({0: 'Did Not Survive', 1: 'Survived'})

    print("\nFirst 10 Predictions:")
    print(predictions_df.head(10).to_string())

    print("\nLast 10 Predictions:")
    print(predictions_df.tail(10).to_string())

    print(f"\n--- Overall Survival Possibility ---")
    print(f"The overall possibility of a passenger to survive in Titanic is: {overall_avg_survival_possibility:.2f}%")

    print("\n--- Prediction Display Complete ---")

if __name__ == "__main__":
    predict_titanic_survival()


--- Starting Titanic Survival Prediction (Python) ---
Loaded dataset using filename: 'Titanic-Dataset.csv'. Shape: (891, 12)

--- Initial Data Snapshot (first 5 rows) ---
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85  