In [69]:
#%cd '/Users/pc/Desktop/Deep Learning/Project Github/CS7643_final_project'

In [70]:
#!python src/holdup/datapickler.py

## This is a notebook that creates two dictionaries: d_nodes will provide the accuracy of the preflop, flop, turn and river data set for different hidden nodes and d_epochs will provide the accuracy for different epochs. 

## The code takes alot of time to run. For that reason I have already saved the dictionaries into the following directory: src/holdup/visualizations/json_dictionaries. This notebook should only be used in case somebody wants to regenerate the dictionaries.

In [50]:
from holdup.the_model.autoencoder import Autoencoder
import numpy as np
# import pandas as pd
import torch
from torch import nn
import torch.optim as optim
# from sklearn.model_selection import train_test_split
import random
import os
from holdup.parser.replayable_hand import ReplayableHand, Streets
import functools
from typing import Tuple, List
import matplotlib.pyplot as plt
from holdup.the_model.get_datasets import *
import pandas as pd
from sklearn.model_selection import train_test_split

preflop = "preflop"
flop = "flop"
turn = "turn"
river = "river"

def get_stage(dataset, stage):
    if stage == preflop:
        return dataset[0]
    if stage == flop:
        return dataset[1]
    if stage == turn:
        return dataset[2]
    if stage == river:
        return dataset[3]

def flatten_streets(dataset):
    streets = [[], [], [], []]
    for logfile in dataset:
        for index, street in enumerate(logfile):
            streets[index] = streets[index] + street
    return streets


def get_data(dataset, stage):
    flattened_data = flatten_streets(dataset)
    stage_data = get_stage(flattened_data, stage)
    return [(x[0], x[1][1]) for x in stage_data]


with open('last_possible.pickle', 'rb') as last_possible_pickle:
    last_possible_dataset = pickle.load(last_possible_pickle)

preflop_data = get_data(last_possible_dataset, "preflop")
flop_data = get_data(last_possible_dataset, "flop")
turn_data = get_data(last_possible_dataset, "turn")
river_data = get_data(last_possible_dataset, "river")

def separate_train_test(street_data):
    n_train = int(len(street_data)*0.6)
    train_set = street_data[:n_train]
    test_set = street_data[n_train:]
    return train_set,test_set

train_preflop, test_preflop =separate_train_test(preflop_data)
print("preflop_train_data_size: {}".format(len(train_preflop)))
print("preflop_test_data_size: {}".format(len(test_preflop)))

train_flop, test_flop=separate_train_test(flop_data)
print("flop_train_data_size: {}".format(len(train_flop)))
print("flop_test_data_size: {}".format(len(test_flop)))

train_turn, test_turn=separate_train_test(turn_data)
print("turn_train_data_size: {}".format(len(train_turn)))
print("turn_test_data_size: {}".format(len(test_turn)))

train_river, test_river=separate_train_test(river_data)
print("river_train_data_size: {}".format(len(train_river)))
print("river_test_data_size: {}".format(len(test_river)))


# Set the device to use CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create an instance of the autoencoder
# model = Autoencoder(num_hidden_nodes).to(device)

def train(model, train_loader, num_epochs, weight_decay, pftr='stage_name'):
    criterion = nn.CrossEntropyLoss() #changed to cross entropy loss for classification based tasks (semi-supervised)
    optimizer = optim.Adam(model.parameters(), weight_decay=weight_decay)
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for data in train_loader:
            inputs, labels = data
            inputs = inputs.float()
            optimizer.zero_grad()
            batch_size, _, _ = inputs.size()
            inputs = inputs.view(batch_size, -1)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_loss)


    
def quick_test2(model,test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs = inputs.float()
            batch_size, _, _ = inputs.size()
            inputs = inputs.view(batch_size, -1)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return int(round((correct/total)*100, 0))


def train_and_quick_test(num_hidden_nodes, num_epochs, weight_decay,train_data,test_data, pftr):
    # Define the model
    model = Autoencoder(num_hidden_nodes).to(device)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=20, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=20, shuffle=False)
    train(model, train_loader, num_epochs, weight_decay,pftr)
    # Test the model
    quick_test(model,test_loader)
    

###
def get_visualization_parameters_nodes(weight_decay,train_preflop, train_flop, train_turn, train_river, test_preflop, test_flop, test_turn, test_river, pftr_preflop, pftr_flop, pftr_turn, pftr_river):
    final_dict = {}
    for i in range(10,110,10):
        
        final_dict[str(i)] = []
        
        model = Autoencoder(i).to(device)
        train_loader = torch.utils.data.DataLoader(train_preflop, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_preflop, batch_size=20, shuffle=False)
        train(model, train_loader, 20, weight_decay,pftr_preflop)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)

        model = Autoencoder(i).to(device)
        train_loader = torch.utils.data.DataLoader(train_flop, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_flop, batch_size=20, shuffle=False)
        train(model, train_loader, 20, weight_decay,pftr_flop)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)
        

        
        model = Autoencoder(i).to(device)
        train_loader = torch.utils.data.DataLoader(train_turn, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_turn, batch_size=20, shuffle=False)
        train(model, train_loader, 40, weight_decay,pftr_turn)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)
        
        model = Autoencoder(i).to(device)
        train_loader = torch.utils.data.DataLoader(train_river, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_river, batch_size=20, shuffle=False)
        train(model, train_loader, 40, weight_decay,pftr_river)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)
        
        final_dict[str(i)] = tuple(final_dict[str(i)])
        
        print(final_dict)

        
    
    return final_dict


def get_visualization_parameters_epochs(weight_decay,train_preflop, train_flop, train_turn, train_river, test_preflop, test_flop, test_turn, test_river, pftr_preflop, pftr_flop, pftr_turn, pftr_river):

    final_dict = {}
    for i in range(10,70,10):
        
        final_dict[str(i)] = []
        
        model = Autoencoder(20).to(device)
        train_loader = torch.utils.data.DataLoader(train_preflop, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_preflop, batch_size=20, shuffle=False)
        train(model, train_loader, i, weight_decay,pftr_preflop)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)

        model = Autoencoder(20).to(device)
        train_loader = torch.utils.data.DataLoader(train_flop, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_flop, batch_size=20, shuffle=False)
        train(model, train_loader, i, weight_decay,pftr_flop)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)
        
        model = Autoencoder(40).to(device)
        train_loader = torch.utils.data.DataLoader(train_turn, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_turn, batch_size=20, shuffle=False)
        train(model, train_loader, i, weight_decay,pftr_turn)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)
        
        model = Autoencoder(40).to(device)
        train_loader = torch.utils.data.DataLoader(train_river, batch_size=20, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_river, batch_size=20, shuffle=False)
        train(model, train_loader, i, weight_decay,pftr_river)
        result = quick_test2(model,test_loader)
        final_dict[str(i)].append(result)
        
        final_dict[str(i)] = tuple(final_dict[str(i)])
        
        print(final_dict)
        
    
    return final_dict

def get_visualization_dictionaries(weight_decay,train_preflop, train_flop, train_turn, train_river, test_preflop, test_flop, test_turn, test_river, pftr_preflop, pftr_flop, pftr_turn, pftr_river):
    
    d_f1 = get_visualization_parameters_nodes(weight_decay,train_preflop, train_flop, train_turn, train_river, test_preflop, test_flop, test_turn, test_river, pftr_preflop, pftr_flop, pftr_turn, pftr_river)
    d_f2 = get_visualization_parameters_epochs(weight_decay,train_preflop, train_flop, train_turn, train_river, test_preflop, test_flop, test_turn, test_river, pftr_preflop, pftr_flop, pftr_turn, pftr_river)
        
    return d_f1, d_f2




preflop_train_data_size: 30650
preflop_test_data_size: 20434
flop_train_data_size: 23450
flop_test_data_size: 15634
turn_train_data_size: 18447
turn_test_data_size: 12298
river_train_data_size: 16371
river_test_data_size: 10915


## Running the code line below will generate two dictionaries: d_nodes and d_epochs 

In [68]:
d_nodes, d_epochs = get_visualization_dictionaries(.001,train_preflop, train_flop, train_turn, train_river, test_preflop, test_flop, test_turn, test_river, pftr_preflop='preflop_last_possible', pftr_flop='flop_last_possible', pftr_turn='turn_last_possible', pftr_river='river_last_possible')

In [54]:
d_nodes

{'10': (80, 83, 90, 88),
 '20': (79, 83, 90, 87),
 '30': (78, 82, 90, 87),
 '40': (80, 82, 90, 87),
 '50': (79, 83, 91, 87),
 '60': (79, 80, 90, 87),
 '70': (80, 82, 90, 87),
 '80': (78, 83, 90, 87),
 '90': (80, 82, 90, 87),
 '100': (78, 82, 90, 87)}

In [55]:
d_epochs

{'10': (80, 83, 90, 87),
 '20': (79, 83, 90, 87),
 '30': (79, 83, 90, 87),
 '40': (80, 83, 89, 87),
 '50': (79, 83, 90, 88),
 '60': (78, 82, 90, 88)}

## Running the line below will export the dictionaires as json files to src/holdup/visualizations/json_dictionaries

In [71]:
import json

with open(r'src/holdup/visualizations/json_dictionaries/d_nodes.json', 'w') as f:
    json.dump(d_nodes, f)

with open(r'src/holdup/visualizations/json_dictionaries/d_epochs.json', 'w') as f:
    json.dump(d_epochs, f)