<a href="https://colab.research.google.com/github/madiha-ahmed-chowdhury/ASRRO_Shared-Task-Hunt/blob/main/Logistic_Regression__Titanic_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt

In [None]:
titanic_data = pd.read_csv('/content/train_and_test2.csv')

In [None]:
# Handling missing values
titanic_data.fillna({
    'Age': titanic_data['Age'].median(),
    'Embarked': titanic_data['Embarked'].mode()[0]
}, inplace=True)

In [None]:
# Encoding categorical variables
label_encoder = LabelEncoder()
titanic_data['Sex'] = label_encoder.fit_transform(titanic_data['Sex'])
titanic_data['Embarked'] = label_encoder.fit_transform(titanic_data['Embarked'])

In [None]:
# Renaming columns for clarity
titanic_data.rename(columns={'2urvived': 'Survived', 'sibsp': 'SibSp'}, inplace=True)

# Selecting relevant features
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = titanic_data[features]
y = titanic_data['Survived']


In [None]:
# Splitting the dataset into Train (80%) and Test (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
class LogisticRegressionGD:
    def __init__(self, learning_rate=0.01, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None
        self.cost_history = []

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for i in range(self.iterations):
            # Compute predictions
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = self.sigmoid(linear_model)

            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Update weights and bias
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            # Compute cost and store it
            cost = -(1 / n_samples) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
            self.cost_history.append(cost)

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(linear_model)
        return [1 if i > 0.5 else 0 for i in y_pred]

In [None]:
# Training the model
model = LogisticRegressionGD(learning_rate=0.01, iterations=1000)
model.fit(X_train_scaled, y_train)


In [None]:
# Extracting weights, bias, and cost history
optimal_weights = model.weights
optimal_bias = model.bias
cost_history = model.cost_history

In [None]:
# Plotting cost vs iterations (Learning Curve)
plt.plot(range(len(cost_history)), cost_history)
plt.title("Learning Curve: Cost vs Iterations")
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.grid()
plt.show()

In [None]:
# Cost vs Weights (Cost Curve)
plt.plot(optimal_weights, cost_history[-len(optimal_weights):])
plt.title("Cost vs Weights")
plt.xlabel("Weights")
plt.ylabel("Cost")
plt.grid()
plt.show()

In [None]:
# Predictions and Evaluation
y_test_pred = model.predict(X_test_scaled)
test_accuracy = np.mean(y_test_pred == y_test) * 100

print("Optimal Weights:", optimal_weights)
print("Optimal Bias:", optimal_bias)
print("Test Set Accuracy:", test_accuracy)