## Logistic Regression Using Gradient Ascent

### Import and Split Data

In [None]:
import pandas as pd
import numpy as np
import random

#Data
#https://archive.ics.uci.edu/ml/datasets/Tic-Tac-Toe+Endgame

#reading data
tictactoe_raw = pd.read_csv("tic-tac-toe.txt", sep=",")

#turning all data as categorical and numerical
for column in tictactoe_raw:
    tictactoe_raw[column] = tictactoe_raw[column].astype('category')
    tictactoe_raw[column] = tictactoe_raw[column].cat.codes
    
training_data = tictactoe_raw.sample(frac=0.5, replace=True)
test_data = tictactoe_raw.sample(frac=0.5, replace=True)

#Selecting win/lose as our Y dependent variable (as ndarray)
Y_train = training_data['class'].values

#Selecting the rest of the features for our independent X variables
X_train = training_data[['top-left-square', 'top-middle-square', 'top-right-square',
 'middle-left-square', 'middle-middle-square', 'middle-right-square',
 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']].values

#Test Data
Y_test = test_data['class'].values
X_test = test_data[['top-left-square', 'top-middle-square', 'top-right-square',
 'middle-left-square', 'middle-middle-square', 'middle-right-square',
 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']].values

### Stochastic Gradient Ascent Step

In [None]:
#Randomly generated weights for our Linear model
w = np.array([random.random() for x in range(X_train.shape[1])])
epochs = 1

#Stochastic Gradient Descent
for epoch in range(0, epochs):
    #Re-shuffling X points
    training_data = training_data.sample(frac=1, replace=False)
    Y_train = training_data['class'].values
    X_train = training_data[['top-left-square', 'top-middle-square', 'top-right-square',
 'middle-left-square', 'middle-middle-square', 'middle-right-square',
 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']].values
    eta_initial = 1.0
    s_loc = 0
    #Stochastic sample by sample iteration step
    for x_s in X_train:
        #gradient (could also use regularization or the Hessian here)
        w_error = np.dot(x_s.transpose(), Y_train[s_loc]-(1/(1+np.exp(np.dot(x_s, -w)))))
        #decreasing eta rate
        eta = eta_initial/(s_loc + 1)
        #stepping in the direction of the gradient
        w = w + eta*w_error
        #incrementing the position of the sample in order to keep track of which y-value corresponds to the X sample
        s_loc = s_loc + 1
    
print("Converged w:", w)
        

### Testing Results

In [None]:
test_predictions = []

#Sigmoid Function
test_prediction = 1/(1+np.exp(np.dot(X_test, -w)))

#If closer to 0 in sigmoid function allocate 0, if closer to 1, allocate 1 for the prediction
for ysample in test_prediction:
    if ysample <= 0.5:
        test_predictions.append(0)
    elif ysample > 0.5:
        test_predictions.append(1)

        
print("Test Accuracy Stochastic Gradient Ascent:", 100*sum(test_predictions == Y_train)/len(test_predictions),"%")

### Stochastic Implementation With Newton Raphson

In [None]:
#Randomly generated weights for our Linear model
w = np.array([random.random() for x in range(X_train.shape[1])])
epochs = 1

#Used for the hessian
I = np.eye(w.shape[0])

#Stochastic Gradient Descent
for epoch in range(0, epochs):
    #Re-shuffling X points
    training_data = training_data.sample(frac=1, replace=False)
    Y_train = training_data['class'].values
    X_train = training_data[['top-left-square', 'top-middle-square', 'top-right-square',
 'middle-left-square', 'middle-middle-square', 'middle-right-square',
 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']].values
    eta_initial = 1.0
    s_loc = 0
    #Stochastic sample by sample iteration step
    for x_s in X_train:
        #gradient
        gradient = np.dot(x_s.transpose(), Y_train[s_loc]-(1/(1+np.exp(np.dot(x_s, -w)))))
        #Later used below in the Hessian calculation
        P =(1/(1+np.exp(np.dot(x_s, -w))))
        #Hessian
        H = np.dot(np.dot(np.dot(x_s.transpose(), P), I-P), x_s)
        #decreasing eta rate
        eta = eta_initial/(s_loc + 1)
        #stepping in the direction of the gradient using the Newton Raphson Method
        w = w + eta*gradient/H
        #incrementing the position of the sample in order to keep track of which y-value corresponds to the X sample
        s_loc = s_loc + 1
    
print("Converged w:", w)
        

### Testing Results

In [None]:
test_predictions = []

#Sigmoid Function
test_prediction = 1/(1+np.exp(np.dot(X_test, -w)))

#If closer to 0 in sigmoid function allocate 0, if closer to 1, allocate 1 for the prediction
for ysample in test_prediction:
    if ysample <= 0.5:
        test_predictions.append(0)
    elif ysample > 0.5:
        test_predictions.append(1)

        
print("Test Accuracy Newton Raphson:", 100*sum(test_predictions == Y_train)/len(test_predictions),"%")