In [303]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from scipy.optimize import minimize
from sklearn.preprocessing import PolynomialFeatures

In [304]:
df = pd.read_csv('Advertising.csv')
df.head(3)

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Ad Topic Line,City,Male,Country,Timestamp,Clicked on Ad
0,68.95,35,61833.9,256.09,Cloned 5thgeneration orchestration,Wrightburgh,0,Tunisia,2016-03-27 00:53:11,0
1,80.23,31,68441.85,193.77,Monitored national standardization,West Jodi,1,Nauru,2016-04-04 01:39:02,0
2,69.47,26,59785.94,236.5,Organic bottom-line service-desk,Davidton,0,San Marino,2016-03-13 20:35:42,0


# 0. Preprocessing

In [305]:
X = df.iloc[:,[0,1,2,3,6]].values
y = df.iloc[:,-1].values.reshape(-1,1)

## Normalisasi

In [306]:
def FeatureScaling(X):
    mu = X.mean(axis=0)
    sigma= X.std(axis=0)
    [m,n] = X.shape
    t = np.ones((m,1))
    Xnorm =(X - (t*mu)) / (t*sigma)
    return(Xnorm)

### X before

In [307]:
X

array([[6.895000e+01, 3.500000e+01, 6.183390e+04, 2.560900e+02,
        0.000000e+00],
       [8.023000e+01, 3.100000e+01, 6.844185e+04, 1.937700e+02,
        1.000000e+00],
       [6.947000e+01, 2.600000e+01, 5.978594e+04, 2.365000e+02,
        0.000000e+00],
       ...,
       [5.163000e+01, 5.100000e+01, 4.241572e+04, 1.203700e+02,
        1.000000e+00],
       [5.555000e+01, 1.900000e+01, 4.192079e+04, 1.879500e+02,
        0.000000e+00],
       [4.501000e+01, 2.600000e+01, 2.987580e+04, 1.783500e+02,
        0.000000e+00]])

### X after 

In [308]:
X[:,:4] = FeatureScaling(X[:,:4])
X

array([[ 0.24926659, -0.11490498,  0.50969109,  1.73403   ,  0.        ],
       [ 0.96113227, -0.57042523,  1.00253021,  0.31380538,  1.        ],
       [ 0.28208309, -1.13982553,  0.35694859,  1.28758905,  0.        ],
       ...,
       [-0.84377541,  1.707176  , -0.93857029, -1.35892388,  1.        ],
       [-0.59638946, -1.93698596, -0.97548353,  0.18117208,  0.        ],
       [-1.26155474, -1.13982553, -1.87383208, -0.0376045 ,  0.        ]])

## Adding Bias

In [309]:
X = np.c_[np.ones(m),X]
[m,n] = X.shape
theta = np.zeros((n,1))

# LINEAR REGRESSION

# 1.Cost Function
#return: J of theta

In [310]:
def sigmoid(z):
    return(1/(1+np.exp(-z)))

In [311]:
def costFunction(theta,X,y):
    [m,n] = X.shape
    h=sigmoid(X.dot(theta))
    J=(-1/m)*((np.log(h)).T.dot(y) + ((np.log(1-h)).T).dot(1-y))
    return(J)

In [333]:
def costFunctionReg(theta,X,y):
    reg=10
    [m,n] = X.shape
    h=sigmoid(X.dot(theta))
    J=(-1/m)*((np.log(h)).T.dot(y) + ((np.log(1-h)).T).dot(1-y)) + (reg/(2*m))*np.sum(np.square(theta[1:]))
    return(J)

# 2.Gradient Descent 
#return: grad = 1/m * X.T * (h-y)

In [336]:
def gradient(theta,X,y):
    [m,n] = X.shape
    h=sigmoid(X.dot(theta.reshape(-1,1)))
    grad = (1/m) * X.T.dot(h-y)
    return (grad.flatten())

In [337]:
def gradientReg(theta,X,y):
    reg = 10
    [m,n] = X.shape
    h=sigmoid(X.dot(theta.reshape(-1,1)))
    
    R = (reg/m)*np.r_[[[0]], theta[1:].reshape(-1,1)]
    grad = (1/m) * X.T.dot(h-y) + R
    
    return (grad.flatten())

In [340]:
#WARNING!!!!
#kalo y = (1000,) BUKAN (1000,1) (harus di reshape(-1,1) dulu)
# kalo ga..... h.shape(1000,1) - y.shape(1000,) = size(1000,1000) 

# 3.Optimize CostFunc - Minimize
#return optimum theta (res.x)

In [341]:
# res = minimize(costFunction, theta, args=(X,y), method=None, jac=gradient, options={'maxiter':10})
# res

[m,n] = X.shape
theta = np.zeros((n,1))
res = minimize(costFunction, theta, args=(X,y), method=None, jac=gradient, options={'maxiter':400})
resreg = minimize(costFunctionReg, theta, args=(X,y), method=None, jac=gradientReg, options={'maxiter':400})

In [328]:
#Theta Optimum Tanpa Regularisasi
res.x

array([ 2.07024064, -3.05387921,  1.50071789, -1.82437695, -2.78705321,
       -0.4217089 ])

In [342]:
#Theta Optimum dengan Regularisasi
resreg.x

array([ 0.79194306, -1.73935294,  0.86162782, -0.97310569, -1.73083377,
       -0.17242466])

# 4.Predicting
Xtest using optimum theta

In [343]:
def prediction(theta,X,y,threshold=0.5):
    threshold = threshold
    p = sigmoid(X.dot(theta))>=threshold
    return(p.astype('int'))

In [331]:
p = prediction(res.x,X,y)
accuracy = round((100*sum((p==y.ravel())/p.size)),2)
print("Accuracy from Sigmoid by linreg = ", accuracy)

Accuracy from Sigmoid by linreg =  97.2


In [344]:
p = prediction(resreg.x,X,y)
accuracy = round((100*sum((p==y.ravel())/p.size)),2)
print("Accuracy from Sigmoid by linreg = ", accuracy)

Accuracy from Sigmoid by linreg =  96.9


# POLYNOMIAL 

In [423]:
X = df.iloc[:,[0,1,2,3,6]].values
y = df.iloc[:,-1].values.reshape(-1,1)

X[:,:4] = FeatureScaling(X[:,:4])

In [424]:
poly = PolynomialFeatures(6)
#otomatis ada penambahan bias
XX = poly.fit_transform(X)

In [425]:
[m,n] = XX.shape
initialTheta = np.zeros((n,1))

In [426]:
#CostFunctionPoly & gradientDescent

def costFunctionPoly(theta,reg,*args):
    [m,n] = XX.shape
    h=sigmoid(XX.dot(theta))
    J=(-1/m)*((np.log(h)).T.dot(y) + ((np.log(1-h)).T).dot(1-y)) + (reg/(2*m))*np.sum(np.square(theta[1:]))
    return(J)


def gradientPoly(theta,reg, *args):
    [m,n] = XX.shape
    h=sigmoid(XX.dot(theta.reshape(-1,1)))
    
    R = (reg/m)*np.r_[[[0]], theta[1:].reshape(-1,1)]
    grad = (1/m) * XX.T.dot(h-y) + R
    
    return (grad.flatten())

In [432]:
for C in ([0, 1, 10, 100, 1000,10000,100000]):
    # Optimize costFunctionReg
    res2 = minimize(costFunctionPoly, initialTheta, args=(C, XX, y), method=None, jac=gradientPoly, options={'maxiter':400})
    
    # Accuracy
    p = prediction(res2.x,XX,y)
    accuracy = 100*sum(p == y.ravel())/y.size    
    print('Accuracy from Sigmoid-Poly, C:{} = {}%'.format(C, round(accuracy)))

Accuracy from Sigmoid-Poly, C:0 = 73.0%
Accuracy from Sigmoid-Poly, C:1 = 73.0%
Accuracy from Sigmoid-Poly, C:10 = 73.0%
Accuracy from Sigmoid-Poly, C:100 = 73.0%
Accuracy from Sigmoid-Poly, C:1000 = 73.0%
Accuracy from Sigmoid-Poly, C:10000 = 91.0%
Accuracy from Sigmoid-Poly, C:100000 = 81.0%
