In [21]:
from sklearn.model_selection import train_test_split
import data_preprocessing as dp
from sklearn.metrics import accuracy_score
import numpy as np

In [22]:
(X, Y) = dp.telco.load()
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

In [23]:
# logreg = LogisticRegression()
# logreg.fit(X_train, y_train)
# Y_pred = logreg.predict(X_test)
# print(logreg.score(X_train, y_train))
# print(accuracy_score(y_test, Y_pred))

In [24]:
def tanh(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))

def loss(y, y_hat):
    loss = -np.mean(y*(np.log(y_hat)) - (1-y)*np.log(1-y_hat))
    return loss

def gradients(X, y, y_hat):

    # X --> Input.
    # y --> true/target value.
    # y_hat --> hypothesis/predictions.
    # w --> weights (parameter).
    # b --> bias (parameter).

    # m-> number of training examples.
    m = X.shape[0]

    # Gradient of loss w.r.t weights.
    dw = (1/m)*np.dot(X.T, (y_hat - y))

    # Gradient of loss w.r.t bias.
    db = (1/m)*np.sum((y_hat - y))

    return dw, db

def train(X, y, bs, epochs, lr):

    # X --> Input.
    # y --> true/target value.
    # bs --> Batch Size.
    # epochs --> Number of iterations.
    # lr --> Learning rate.

    # m-> number of training examples
    # n-> number of features
    m, n = X.shape

    # Initializing weights and bias to zeros.
    w = np.zeros((n,1))
    b = 0

    # Reshaping y.
    y = y.reshape(m,1)

    # Empty list to store losses.
    losses = []

    # Training loop.
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):

            # Defining batches. SGD.
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]

            # Calculating hypothesis/prediction.
            y_hat = tanh(np.dot(xb, w)+ b)

            # Getting the gradients of loss w.r.t parameters.
            dw, db = gradients(xb, yb, y_hat)

            # Updating the parameters.
            w -= lr*dw
            b -= lr*db

        # Calculating loss and appending it in the list.
        l = loss(y, tanh(np.dot(X, w) + b))
        losses.append(l)

    # returning weights, bias and losses(List).
    return w, b, losses

def predict(X):

    # X --> Input.

    # Calculating presictions/y_hat.
    preds = tanh(np.dot(X, w) + b)

    # Empty List to store predictions.
    pred_class = [1 if i > 0.5 else 0 for i in preds]

    return np.array(pred_class)

In [25]:
w, b, l = train(X_train, y_train, bs=100, epochs=1000, lr=0.01)


  loss = -np.mean(y*(np.log(y_hat)) - (1-y)*np.log(1-y_hat))


In [26]:
Y_pred = predict(X_test)
print(accuracy_score(y_test, Y_pred))


0.8225691980127751


In [27]:
Y_train_pred = predict(X_train)
print(accuracy_score(Y_train_pred, y_train))

0.7971246006389776
