In [1]:
import numpy as np
import pandas as pd

In [14]:
def sigmoid(x):
    # to limit the values between -500 and 500.
    x = np.clip(x, -500, 500) #remove while using custome dataset
    return 1/(1+np.exp(-x))

class LogisticRegression():
    def __init__(self, lr=0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
        self.losses = []
    
    def fit(self, X, y): 
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features) #initial weight is 0
        self.bias = 0 #initial bias is 0
        self.losses = []
        
        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias #wx + b
            predictions = sigmoid(linear_pred)
            
            #Calculate the gradient
            dw = (1/n_samples) * np.dot(X.T,(predictions - y))
            db = (1/n_samples) * np.sum(predictions - y)
            
            #updates the weight and biases
            self.weights = self.weights - self.lr*dw
            self.bias = self.bias - self.lr*db
            
            # Calculate and store the loss
            predictions = np.clip(predictions, 1e-10, 1 - 1e-10)

            loss = (-1/n_samples) * np.sum(y * np.log(predictions) + (1 - y) * np.log(1 - predictions))
            self.losses.append(loss)
            #The model is trained now
            
#           print("\nLosses during training:", self.losses)

            
    #Make predictions
    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias #wx + b
        y_pred = sigmoid(linear_pred)
        
        #Choose the label based on the probability
        class_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return class_pred

In [15]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

#data = datasets.load_breast_cancer()

data = pd.read_csv("E:\\Mayuresh\\EY_Project\Dataset\\Bank Customer Churn Prediction.csv")
data = data.drop('customer_id', axis = 1)
data = data.drop('gender', axis=1)
data = data.drop('country', axis=1)

#X, y = data.data, data.target

#Dropping Churn from data and assigning to y variable
X = data.drop('churn', axis=1)

#Assigning Churn to y
y = data['churn']

#data['country'] = data['country'].apply(lambda x : 0 if x == 'France' else 1 if x == 'Germany' else 2)
#data['gender'] = data['gender'].apply(lambda x : 0 if x == 'Male' else 1)

# Use get_dummies to perform one-hot encoding
#X = pd.get_dummies(data, columns=['country','gender'], drop_first=True)
#X = X.astype(int)
#X


#Split the data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2) #Randomstate not set for checking 


clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

#Calculate accuracy
def accuracy(y_pred, y_test):
    return np.sum(y_pred==y_test)/len(y_test)

acc = accuracy(y_pred, y_test)
print(acc)

0.7915


#### Accuracy is low because I rmeoved the "gender" and "country" column. I did try One-hot encoding anf Get_dummies but still the results weren't favourable.