In [9]:
import numpy as np
from tqdm import tqdm

class LogisticRegression:
    def __init__(self, num_iterations=2000, learning_rate=0.5):
        self.num_iterations = num_iterations
        self.learning_rate = learning_rate
        self.w = None
        self.b = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def initialize_parameters(self, dim):
        self.w = np.zeros((dim, 1))
        self.b = 0

    def propagate(self, X, Y):
        m = X.shape[1]
        
        A = self.sigmoid(np.dot(self.w.T, X) + self.b)
        cost = -1/m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
        
        dw = 1/m * np.dot(X, (A - Y).T)
        db = 1/m * np.sum(A - Y)
        
        return dw, db, cost

    def optimize(self, X, Y):
        costs = []
        
        for i in tqdm(range(self.num_iterations), desc="Training"):
            dw, db, cost = self.propagate(X, Y)
            
            self.w -= self.learning_rate * dw
            self.b -= self.learning_rate * db
            
            if i % 100 == 0:
                costs.append(cost)
        
        return costs

    def predict(self, X):
        m = X.shape[1]
        Y_prediction = np.zeros((1, m))
        A = self.sigmoid(np.dot(self.w.T, X) + self.b)
        
        Y_prediction = (A > 0.5).astype(int)
        
        return Y_prediction

    def fit(self, X_train, Y_train):
        self.initialize_parameters(X_train.shape[0])
        costs = self.optimize(X_train, Y_train)
        return costs

    def score(self, X, Y):
        Y_prediction = self.predict(X)
        accuracy = 1 - np.mean(np.abs(Y_prediction - Y))
        return accuracy


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('/Users/mohandesigan/Documents/coding/AI/DL-andrew-ng-nbs-/Course 1/Week 2/W2A2/datasets/creditcard.csv')

# Separate features and target
X = df.drop('Class', axis=1)
y = df['Class']



In [11]:
# Preprocess: Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)



In [12]:
# Create and train the model
model = LogisticRegression(num_iterations=5000, learning_rate=0.01)
costs = model.fit(X_train.T, y_train.values.reshape(1, -1))



Training: 100%|██████████| 5000/5000 [00:45<00:00, 110.02it/s]


In [13]:
# Evaluate the model
train_accuracy = model.score(X_train.T, y_train.values.reshape(1, -1))
test_accuracy = model.score(X_test.T, y_test.values.reshape(1, -1))

print(f"Credit Card Fraud Detection - Train accuracy: {train_accuracy}")
print(f"Credit Card Fraud Detection - Test accuracy: {test_accuracy}")


Credit Card Fraud Detection - Train accuracy: 0.9990300423533542
Credit Card Fraud Detection - Test accuracy: 0.9990168884519505
