In [1]:
from sklearn import datasets
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression

cancer = datasets.load_breast_cancer()
cancer_df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
y = cancer.target

X_train, X_test, y_train, y_test = train_test_split(cancer.data, y, test_size=0.3,random_state=21)
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_predict = logreg.predict(X_test)

logreg_confmx = pd.DataFrame(confusion_matrix(y_test, y_predict), index=cancer.target_names, columns=cancer.target_names)
print(logreg_confmx)

           malignant  benign
malignant         57       7
benign             2     105


In [2]:
#Initialize Weights
weights = []

for i in range(0,30):
    i = 0.5
    weights += [i]
    
weights = np.array(weights)
weights

array([ 0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,
        0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,
        0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5])

In [3]:
#Dot product of weights and features
features = cancer.data[0:1]
features = features.reshape(30,-1)
print(features)
output = weights @ features
output

[[  1.79900000e+01]
 [  1.03800000e+01]
 [  1.22800000e+02]
 [  1.00100000e+03]
 [  1.18400000e-01]
 [  2.77600000e-01]
 [  3.00100000e-01]
 [  1.47100000e-01]
 [  2.41900000e-01]
 [  7.87100000e-02]
 [  1.09500000e+00]
 [  9.05300000e-01]
 [  8.58900000e+00]
 [  1.53400000e+02]
 [  6.39900000e-03]
 [  4.90400000e-02]
 [  5.37300000e-02]
 [  1.58700000e-02]
 [  3.00300000e-02]
 [  6.19300000e-03]
 [  2.53800000e+01]
 [  1.73300000e+01]
 [  1.84600000e+02]
 [  2.01900000e+03]
 [  1.62200000e-01]
 [  6.65600000e-01]
 [  7.11900000e-01]
 [  2.65400000e-01]
 [  4.60100000e-01]
 [  1.18900000e-01]]


array([ 1783.089236])

In [4]:
import math 
def sigmoid(x):
    return 1 /(1+(math.e**-x))

sigmoid(output)

array([ 1.])

In [5]:
def cost_function(features, labels, weights):
    '''
    Using Mean Absolute Error

    Features:(100,3)
    Labels: (100,1)
    Weights:(3,1)
    Returns 1D matrix of predictions
    Cost = ( log(predictions) + (1-labels)*log(1-predictions) ) / len(labels)
    '''
    observations = len(labels)

    predictions = predict(features, weights)

    #Take the error when label=1
    class1_cost = -labels*np.log(predictions)

    #Take the error when label=0
    class2_cost = (1-labels)*np.log(1-predictions)

    #Take the sum of both costs
    cost = class1_cost - class2_cost

    #Take the average cost
    cost = cost.sum()/observations

    return cost