In [1]:
#Name: Taslim Ansari
#UIN: 221P010
#Roll No: 03
#Aim: To implement logistic regression

# Import necessary libraries
import pandas as pd # For data manipulation
import matplotlib.pyplot as plt # (Imported but not used in this code)

In [2]:
# Load the Titanic dataset
df = pd.read_csv("./titanic.csv") # Read dataset into df
data = pd.read_csv("./titanic.csv") # Duplicate copy (not used later)

In [3]:
# View first 5 rows (you'd typically use this in a notebook)
df.head()

Unnamed: 0,passengerid,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,1,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S
1,2,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S
2,3,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S
3,4,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S
4,5,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S


In [4]:
# Check for missing values in each column
df.isnull().sum()

passengerid       0
pclass            0
survived          0
name              0
sex               0
age             263
sibsp             0
parch             0
ticket            0
fare              1
cabin          1014
embarked          2
dtype: int64

In [5]:
# Handle missing values
df['age'].fillna(value=df['age'].mean(), inplace=True) # Replace missing age with mean
df['fare'].fillna(value=df['fare'].mean(), inplace=True) # Replace missing fare with mean
df['embarked'].fillna(value=df['embarked'].mode()[0], inplace=True) # Replace missing embark location with most commo

In [6]:
# Drop unnecessary columns
df.drop(labels=['cabin', 'name', 'ticket'], axis=1, inplace=True)

In [7]:
# One-hot encode categorical variables (convert 'sex' and 'embarked' to numerical)
df = pd.get_dummies(df, columns=['sex', 'embarked'], drop_first=True) # drop_first avoids dummy trap

In [8]:
df

Unnamed: 0,passengerid,pclass,survived,age,sibsp,parch,fare,sex_male,embarked_Q,embarked_S
0,1,1,1,29.000000,0,0,211.3375,0,0,1
1,2,1,1,0.916700,1,2,151.5500,1,0,1
2,3,1,0,2.000000,1,2,151.5500,0,0,1
3,4,1,0,30.000000,1,2,151.5500,1,0,1
4,5,1,0,25.000000,1,2,151.5500,0,0,1
...,...,...,...,...,...,...,...,...,...,...
1304,1305,3,0,14.500000,1,0,14.4542,0,0,0
1305,1306,3,0,29.881135,1,0,14.4542,0,0,0
1306,1307,3,0,26.500000,0,0,7.2250,1,0,0
1307,1308,3,0,27.000000,0,0,7.2250,1,0,0


In [9]:
# Prepare features (X) and label (y)
from sklearn.model_selection import train_test_split

In [10]:
X = df.drop('survived', axis=1) # Input features
y = df['survived'] # Target variable (0 = died, 1 = survived)

In [11]:
# Split into training and testing data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42)

In [12]:
# Train a logistic regression model
from sklearn.linear_model import LogisticRegression

In [13]:
model = LogisticRegression(max_iter=1000) # Increase iterations to ensure convergence
model.fit(X_train, y_train) # Fit model on training data

LogisticRegression(max_iter=1000)

In [14]:
# Predict on test data
y_pred = model.predict(X_test)

In [15]:
# Evaluate model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [16]:
print(f"Accuracy: {accuracy_score(y_test, y_pred)}") # Accuracy
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}") # TP, FP, FN, TN
print(f"Classification Report:\n{classification_report(y_test, y_pred)}") # Precision, Recall, F1-score

Accuracy: 0.7748091603053435
Confusion Matrix:
[[127  17]
 [ 42  76]]
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.88      0.81       144
           1       0.82      0.64      0.72       118

    accuracy                           0.77       262
   macro avg       0.78      0.76      0.77       262
weighted avg       0.78      0.77      0.77       262



In [None]:
import tkinter as tk
from tkinter import messagebox
import numpy as np

# Dynamically get feature names from the dataframe used for training
features = X.columns.tolist() # <-- IMPORTANT: Use your actual feature list here
def predict_survival():
    try:
        # Collect input values from GUI
        values = [float(entry.get()) for entry in entries]
        input_data = np.array(values).reshape(1, -1)
        # Predict using your trained model
        prediction = model.predict(input_data)
        # Show prediction in popup
        result = "Survived" if prediction[0] == 1 else "Did not Survive"
        messagebox.showinfo("Prediction Result", f"Passenger likely: {result}")
    except Exception as e:
        messagebox.showerror("Error", f"Invalid input: {e}")
        
# Create GUI window
root = tk.Tk()
root.title("Titanic Survival Predictor")
entries = []

# Create input fields dynamically based on your features
for i, feature in enumerate(features):
    tk.Label(root, text=feature).grid(row=i, column=0, padx=10, pady=5)
    entry = tk.Entry(root)
    entry.grid(row=i, column=1, padx=10, pady=5)
    entries.append(entry)
# Add Predict button
tk.Button(root, text="Predict Survival", command=predict_survival).grid(
    row=len(features), column=0, columnspan=2, pady=20
)
# Run the GUI loop
root.mainloop()