In [31]:
import numpy as np
import math

lines = open("nba_logreg.csv").readlines()
N = len(lines) - 1
D = len(lines[0].split(',')) - 2

X = np.zeros((N, D))
Y = np.zeros(N)

for i in range(1, N):
    line = lines[i].strip('\n').split(',')
    for j in range(1, D + 1):
        if line[j] == '':
            line[j] = 0
    X[i - 1] = line[1:D + 1]
    Y[i - 1] = line[D + 1]
    

In [68]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def predict(X, w):
    m = sigmoid(X @ w)
    return m >= 0.5

def cross_entropy(predictions, Y):
    s = 0
    for i in range(N):
        s -= Y[i] * np.log(predictions[i]) + (1 - Y[i]) * np.log(1 - predictions[i])
    return s

def gradient(predictions, Y, X, w):
    #s = np.zeros(D)
    #for i in range(N):
    #    s += (predictions[i] - Y[i]) * X[i]
    #return s
    return X.T @ (sigmoid(X @ w) - Y)

def optimize(steps, X, Y, speed):
    #w = np.zeros(D)
    w = np.random.normal(0., 0.1, [X.shape[1],])
    for i in range(steps):
        # Evalutes model with current parameters
        predictions = predict(X, w)
        
        delta = gradient(predictions, Y, X, w)
        #print("gradient step number ", i, " ", delta)
        
        # Compute gradient based on predictions
        w -= speed * delta
        
        if(i % (steps / 10) == 0):
            print("------ iteration n°", i, " ------")
            validation(w)
    return w

def validation(w):
    predictions = predict(X, w)
    TP = 0
    FP = 0
    TN = 0
    FN = 0
    for i in range(len(predictions)):
        if Y[i] and predictions[i]:
            TP += 1
        elif Y[i] and not predictions[i]:
            FN += 1
        elif not Y[i] and predictions[i]:
            FP += 1
        else:
            TN += 1
            
    accuracy = (TP + TN) / (TP + FN + FP + TN)
        
    print("TP ", TP / N)
    print("TN ", TN / N)
    print("FP ", FP / N)
    print("FN ", FN / N)
    print("accuracy ", accuracy)

opti_w = optimize(100000, X, Y, 0.00000001)
validation(opti_w)
prediction = predict(X, opti_w)
#for i in range(prediction.shape[0]):
#    print(prediction[i])
    

------ iteration n° 0  ------
TP  0.0
TN  0.3798507462686567
FP  0.0007462686567164179
FN  0.6194029850746269
accuracy  0.3798507462686567
------ iteration n° 10000  ------
TP  0.5231343283582089
TN  0.15970149253731344
FP  0.2208955223880597
FN  0.0962686567164179
accuracy  0.6828358208955224
------ iteration n° 20000  ------
TP  0.5156716417910447
TN  0.1798507462686567
FP  0.2007462686567164
FN  0.10373134328358209
accuracy  0.6955223880597015
------ iteration n° 30000  ------
TP  0.5149253731343284
TN  0.1873134328358209
FP  0.19328358208955224
FN  0.1044776119402985
accuracy  0.7022388059701492
------ iteration n° 40000  ------
TP  0.5156716417910447
TN  0.1917910447761194
FP  0.18880597014925374
FN  0.10373134328358209
accuracy  0.7074626865671642
------ iteration n° 50000  ------
TP  0.5149253731343284
TN  0.1925373134328358
FP  0.1880597014925373
FN  0.1044776119402985
accuracy  0.7074626865671642
------ iteration n° 60000  ------
TP  0.514179104477612
TN  0.1917910447761194
FP