In [1]:
#svm on final
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import svm
from sklearn.model_selection import KFold

#load data
data = np.loadtxt('datasets/tictac_final.txt')
input_data = data[:,:9]
output_data = data[:,9].ravel()  # Convert to 1D array for compatibility
classifier = svm.SVC(kernel='linear')
#cross validation (randomize it)
kf = KFold(n_splits=10, shuffle=True, random_state=42)
cross_val_accuracies = cross_val_score(classifier, input_data, output_data, cv=kf)
mean_accuracy = np.mean(cross_val_accuracies)
print(f"Cross validation with Linear SVM: {mean_accuracy * 100:.2f}%")
#train
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
#accuracy and confusion matrix
print(f"test split accuracy using Linear SVM: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

Cross validation with Linear SVM: 98.33%
test split accuracy using Linear SVM: 96.88%
[[ 61   6]
 [  0 125]]


In [2]:
#k neighbors on final
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
# Load the data
data = np.loadtxt('datasets/tictac_final.txt')
input_data = data[:,:9]
output_data = data[:,9].ravel() 
# Initialize K-Nearest Neighbors classifier
classifier = KNeighborsClassifier()
#cross validation with randomization
kf = KFold(n_splits=10, shuffle=True, random_state=42)
cross_val_accuracies = cross_val_score(classifier, input_data, output_data, cv=kf)
mean_accuracy = np.mean(cross_val_accuracies)
print(f"Cross validation with K-Nearest Neighbors: {mean_accuracy * 100:.2f}%")
#split
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
#accuracy and confusion matrix
print(f"test split accuracy using K-Nearest Neighbors: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

Cross validation with K-Nearest Neighbors: 99.89%
test split accuracy using K-Nearest Neighbors: 99.48%

Confusion Matrix:
[[ 66   1]
 [  0 125]]


In [3]:
#mlp on final
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
#load the data with path
data = np.loadtxt('datasets/tictac_final.txt')
input_data = data[:,:9]
output_data = data[:,9].ravel()
#choose the type of classifier
classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, activation='relu', random_state=42)
#cross validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)
cross_val_accuracies = cross_val_score(classifier, input_data, output_data, cv=kf)
mean_accuracy = np.mean(cross_val_accuracies)
#print the accuracy of the cross validation
print(f"cross validation with MLP: {mean_accuracy * 100:.2f}%")
#train
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
#accuracy of model and confusion matrix
print(f"test split accuracy using MLP: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

cross validation with MLP: 98.33%
test split accuracy using MLP: 97.40%

Confusion Matrix:
[[ 62   5]
 [  0 125]]


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
# Load the data
data = np.loadtxt('datasets/tictac_single.txt')
input_data = data[:,:9]
output_data = data[:,9].ravel()
scaler = StandardScaler()
input_data = scaler.fit_transform(input_data)
#choose classifier and use cross validation with randomization
classifier = SVC(kernel='linear')
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cross_val_accuracies = cross_val_score(classifier, input_data, output_data, cv=kf)
#find accuracy and print it
mean_accuracy = np.mean(cross_val_accuracies)
print(f"Cross validation with Linear SVM: {mean_accuracy * 100:.2f}%")
#train the data
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
#accuracy and confusion matrix
print(f"Test split accuracy using Linear SVM: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
# Load the data
data = np.loadtxt('datasets/tictac_single.txt')
input_data = data[:, :9]
output_data = data[:, 9].ravel()
scaler = StandardScaler()
input_data = scaler.fit_transform(input_data)
best_k = 1
best_score = 0
#cross validation
kf = KFold(n_splits=5, shuffle=True, random_state=23)
#tried a bunch of different values but found one that worked
for k in range(7,8):  # try values of k from 1 to 30
    classifier = KNeighborsClassifier(n_neighbors=k, weights='distance')  # using distance weights
    cross_val_accuracies = cross_val_score(classifier, input_data, output_data, cv=kf)
    mean_accuracy = np.mean(cross_val_accuracies)
    if mean_accuracy > best_score:
        best_k = k
        best_score = mean_accuracy
#print the accuracies with the best k
print(f"Best number of neighbors: {best_k}")
print(f"Cross validation accuracy with best KNN: {best_score * 100:.2f}%")
#train the data
classifier = KNeighborsClassifier(n_neighbors=best_k, weights='distance')
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=23)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
#print accuracy and confusion matrix
print(f"Test split accuracy using best KNN: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
# Load the data
data = np.loadtxt('datasets/tictac_single.txt')
# Separate the data into input and output
input_data = data[:,:9]
output_data = data[:,9].ravel()
# Standardize the data, this is particularly important for neural networks
scaler = StandardScaler()
input_data = scaler.fit_transform(input_data)
# Initialize MLP classifier
classifier = MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=100, random_state=42, activation='relu', solver='adam')
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cross_val_accuracies = cross_val_score(classifier, input_data, output_data, cv=kf)
mean_accuracy = np.mean(cross_val_accuracies)
print(f"Cross validation with MLP: {mean_accuracy * 100:.2f}%")
# Train the classifier on the train split and evaluate on test split
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test split accuracy using MLP: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
#split the data with multiple labels based on format of data
def multi_label_confusion_matrix(y_true, y_pred):
    num_labels = y_true.shape[1]
    #confusion matrix
    conf_matrix = np.zeros((num_labels, 2, 2))
    #combinations
    for i in range(num_labels):
        #check if values in certain rows result to true
        tp = np.sum((y_true[:, i] == 1) & (y_pred[:, i] == 1))
        tn = np.sum((y_true[:, i] == 0) & (y_pred[:, i] == 0))
        fp = np.sum((y_true[:, i] == 0) & (y_pred[:, i] == 1))
        fn = np.sum((y_true[:, i] == 1) & (y_pred[:, i] == 0))
        conf_matrix[i] = [[tp, fp], [fn, tn]]
    return conf_matrix
data = np.loadtxt('datasets/tictac_multi.txt')
X = data[:,:9]
y = data[:,9:]
#scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)
#split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
#cross validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
print("Linear regression: ")
linear_models = [LinearRegression() for _ in range(9)]
#initialize arrays for splitting of data
predictions = []
cross_val_accuracies_lr = []
#iterate each linear model for this type of regression and data and add to initialized arrays
for i, model in enumerate(linear_models):
    acc = cross_val_score(model, X, y[:, i], cv=kf)
    cross_val_accuracies_lr.append(np.mean(acc))
    model.fit(X_train, y_train[:, i])
    pred = model.predict(X_test)
    predictions.append(pred)
#check the score of corss validation
mean_cross_val_accuracy_lr = np.mean(cross_val_accuracies_lr)
#cross validation code
print(f"cross validation with Linear Regression: {mean_cross_val_accuracy_lr * 100:.2f}%")
predictions = np.array(predictions).T
#round or clamp to 0 or 1 for each regression output
predictions = (predictions == predictions.max(axis=1)[:, None]).astype(int)
acc = accuracy_score(y_test, predictions)
#show accuracy with linear regression
print(f"Accuracy using Linear Regression: {acc * 100:.2f}%")
print("Confusion Matrix for Linear Regression:")
print(multi_label_confusion_matrix(y_test, predictions))
# KNN regression below
print("\nEvaluating kNN Regression...")
#i chose to use 15 neighbors here
knn = KNeighborsRegressor(n_neighbors=15)
cross_val_accuracies_knn = cross_val_score(knn, X, y, cv=kf)
mean_cross_val_accuracy_knn = np.mean(cross_val_accuracies_knn)
#show cross validation score with using KNN
print(f"Cross validation with kNN Regression: {mean_cross_val_accuracy_knn * 100:.2f}%")
#train the data
knn.fit(X_train, y_train)
knn_preds = knn.predict(X_test)
knn_preds = (knn_preds == knn_preds.max(axis=1)[:, None]).astype(int)
#show the accuracy with the KNN model
acc = accuracy_score(y_test, knn_preds)
print(f"Accuracy using kNN Regression: {acc * 100:.2f}%")
print("Confusion Matrix for kNN Regression:")
print(multi_label_confusion_matrix(y_test, knn_preds))
# MLP Regression
print()
print("MLP Below:")
#use mlp and layer it with a random state as well
mlp = MLPRegressor(hidden_layer_sizes=(100,), max_iter=10000, random_state=23)
#cross validate with mlp
cross_val_accuracies_mlp = cross_val_score(mlp, X, y, cv=kf)
mean_cross_val_accuracy_mlp = np.mean(cross_val_accuracies_mlp)
#show the accuracy of cross validation with mlp
print(f"Cross validation with MLP Regression: {mean_cross_val_accuracy_mlp * 100:.2f}%")]
#train the data
mlp.fit(X_train, y_train)
mlp_preds = mlp.predict(X_test)
mlp_preds = (mlp_preds == mlp_preds.max(axis=1)[:, None]).astype(int)
acc = accuracy_score(y_test, mlp_preds)
#show the accuracy and confusion matrix
print(f"Accuracy using MLP Regression: {acc * 100:.2f}%")
print("Confusion Matrix for MLP Regression:")
print(multi_label_confusion_matrix(y_test, mlp_preds))


In [None]:
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
#currently trying tic tac toe against mlp regression
# load and preprocess data
data = np.loadtxt('datasets/tictac_multi.txt')
X = data[:, :9]
y = data[:, 9:]
#scale the data
scaler = StandardScaler().fit(X)
#use mlp
mlp = MLPRegressor(hidden_layer_sizes=(100,), max_iter=10000).fit(scaler.transform(X), y)
#define how the ML will make a move
def ML_move(board):
    #make prediction
    preds = mlp.predict(scaler.transform([board]))
    #make a move
    move = np.argmax(preds)
    #return the move value
    while board[move] != 0:
        preds[0][move] = -np.inf
        move = np.argmax(preds)
    return move
#print the board
def print_board(b):
    chars = ['O', ' ', 'X']
    for i in range(0, 9, 3):
        #print for each row/col in the tic tac toe board
        print("|".join([chars[val+1] for val in b[i:i+3]]))
#hardcoded to see which game state had a winner
def check_winner(b):
    wins = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
    #return the possibility of someone winning
    return any(b[i] == b[j] == b[k] and b[i] != 0 for i, j, k in wins)
#initalize the board
board = np.zeros(9, dtype=int)
#while true meaning we keep playing the game (didn't make it continuous)
while True:
    print_board(board)
    move = int(input("Your move (1-9): "))
    move -=1
    if board[move] == 0:
        board[move] = 1
        if check_winner(board):
            print_board(board)
            print("win")
            break
        if 0 not in board:
            print_board(board)
            print("draw")
            break
        board[ML_move(board)] = -1
        if check_winner(board):
            print_board(board)
            print("loss")
            break


In [None]:
#Extra Credit Linear Regression normal equations
#https://www.educative.io/answers/a-deep-dive-into-linear-regression-3-way-implementation
#I used the article above to try and learn more about it (not sure if i'm correct in this implementation)
#I'm trying to use theta = (X^TX)^(-1)X^Ty
import numpy as np
#some X variable
X = sudodata
Y = sudodata

#add bias
xb = np.c_[np.ones((X.shape[0], 1)), X]
    
#theta??
theta = np.linalg.pinv(xb.T.dot(xb)).dot(xb.T).dot(y) 

#theta would be the output?