# Logistic Regression - Implementation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
os.listdir(os.getcwd())

['.ipynb_checkpoints', 'hr_job.csv', 'LogisticRegression Practice.ipynb']

In [3]:
data = pd.read_csv("hr_job.csv")

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17257 entries, 0 to 17256
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      17257 non-null  int64  
 1   city                    17257 non-null  int64  
 2   city_development_index  17257 non-null  float64
 3   gender                  17257 non-null  int64  
 4   relevent_experience     17257 non-null  int64  
 5   enrolled_university     17006 non-null  float64
 6   education_level         16962 non-null  float64
 7   major_discipline        15441 non-null  float64
 8   experience              17161 non-null  float64
 9   company_size            9603 non-null   float64
 10  company_type            9982 non-null   float64
 11  last_new_job            16928 non-null  float64
 12  training_hours          17257 non-null  float64
 13  target                  17257 non-null  int64  
dtypes: float64(9), int64(5)
memory usage: 

In [6]:
data.shape

(17257, 14)

In [7]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ID,17257.0,8629.0,4981.811133,1.0,4315.0,8629.0,12943.0,17257.0
city,17257.0,72.991887,46.128232,1.0,21.0,83.0,103.0,180.0
city_development_index,17257.0,0.805103,0.128002,0.448,0.68,0.874349,0.92,0.949
gender,17257.0,0.828417,0.488328,0.0,1.0,1.0,1.0,2.0
relevent_experience,17257.0,0.710726,0.453438,0.0,0.0,1.0,1.0,1.0
enrolled_university,17006.0,0.476891,0.825014,0.0,0.0,0.0,1.0,2.0
education_level,16962.0,3.108183,0.582475,1.0,3.0,3.0,3.0,5.0
major_discipline,15441.0,3.888543,0.58196,0.0,4.0,4.0,4.0,5.0
experience,17161.0,10.53237,8.291935,0.0,4.0,8.0,15.0,25.0
company_size,9603.0,4.259711,2.18791,1.0,3.0,4.0,6.0,8.0


In [9]:
class StandardScalar():
    def fit_transform(self, x):
        mean = np.mean(x, axis = 0)
        std_dev = np.std(x)
        return (x - mean) / std_dev

In [11]:
data.isnull().sum()

ID                           0
city                         0
city_development_index       0
gender                       0
relevent_experience          0
enrolled_university        251
education_level            295
major_discipline          1816
experience                  96
company_size              7654
company_type              7275
last_new_job               329
training_hours               0
target                       0
dtype: int64

In [12]:
data.fillna(method = 'bfill', inplace = True)

In [14]:
data.isnull().sum()

ID                        0
city                      0
city_development_index    0
gender                    0
relevent_experience       0
enrolled_university       0
education_level           0
major_discipline          0
experience                0
company_size              0
company_type              0
last_new_job              0
training_hours            0
target                    0
dtype: int64

In [15]:
data.head(5)

Unnamed: 0,ID,city,city_development_index,gender,relevent_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours,target
0,1,21,0.624,1,0,2.0,3.0,4.0,3.0,4.0,4.0,0.0,31.372145,1
1,2,21,0.616795,1,0,2.0,3.0,4.0,0.0,4.0,4.0,1.0,63.988905,1
2,3,73,0.754,1,0,2.0,3.0,4.0,0.0,4.0,4.0,0.0,19.0,1
3,4,57,0.866,1,0,0.0,5.0,4.0,9.0,4.0,4.0,5.0,53.0,0
4,5,21,0.624,1,1,0.0,4.0,4.0,5.0,3.0,4.0,5.0,108.0,1


In [16]:
data.drop('city', axis = 1, inplace = True)

In [17]:
X = data.iloc[:,:-1]
Y = data.target

In [18]:
sc = StandardScalar()
X = sc.fit_transform(X)

In [19]:
def initialize_weights(dim):
    W = np.full((dim, 1), 0.1)
    b = 0.0
    return W, b

In [20]:
def activation(Z):
    sigmoid = 1 / (1 + np.exp(-Z))
    return sigmoid

In [21]:
def costFunction(y_pred, y):
    training_samples = y_shape[0]
    loss = (y * np.log(y_pred)) + ((1 - y) * np.log(1 - y_pred))
    return -np.sum(loss) / training_samples

In [22]:
def gradient_descent(x_train, y_train, y_pred):
    cost - costFunction(y_pred, y_train)
    derivative_weight = (np.dot(x_train.T,(y_pred - y_train))) / x_train.shape[0]
    derivative_bias = np.sum(y_pred - y_train, keepdims = True) / x_train.shape[0]
    gradients = {"dW" : derivative_weight, "db" : derivative_bias}
    return cost, gradients