# Training with UCI Data 


In [1]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
connect_4 = fetch_ucirepo(id=26) 
  
# data (as pandas dataframes) 
X = connect_4.data.features 
y = connect_4.data.targets 
  
# metadata 
print(connect_4.metadata) 
  
# variable information 
print(connect_4.variables) 


{'uci_id': 26, 'name': 'Connect-4', 'repository_url': 'https://archive.ics.uci.edu/dataset/26/connect+4', 'data_url': 'https://archive.ics.uci.edu/static/public/26/data.csv', 'abstract': 'Contains connect-4 positions', 'area': 'Games', 'tasks': ['Classification'], 'characteristics': ['Multivariate', 'Spatial'], 'num_instances': 67557, 'num_features': 42, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1995, 'last_updated': 'Sat Mar 09 2024', 'dataset_doi': '10.24432/C59P43', 'creators': ['John Tromp'], 'intro_paper': None, 'additional_info': {'summary': 'This database contains all legal 8-ply positions in the game of connect-4 in which neither player has won yet, and in which the next move is not forced.\r\n\r\nx is the first player; o the second.\r\n\r\nThe outcome class is the game theoretical value for the first player.', 'purpose': None, 'funded_b

In [12]:
import os

os.chdir("/Users/leontan/OneDrive - National University of Singapore/Curriculum/Y3S1/CS2109S/Problem Set/contest/connect+4")

In [15]:
import unlzw3
from pathlib import Path
import csv
import os

uncompressed_data = unlzw3.unlzw(Path("./connect-4.data.Z"))
data = uncompressed_data.decode('utf-8').splitlines()
with open("new.csv", "w", newline="") as csv_file:
    writer = csv.writer(csv_file)
    for line in data:
        # making cells by using , delimiters
        writer.writerow(line.split(","))

In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import gzip
import shutil

# Load the dataset
#data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/connect-4/connect-4.data.Z"
compressed_file_path = "./connect+4/connect-4.data.Z"
decompressed_file_path = "./connect+4/connect-4.data"

# Decompress the file
with open(compressed_file_path, 'rb') as f_in:
    with open(decompressed_file_path, 'wb') as f_out:
        f_out.write(gzip.decompress(f_in.read()))

columns = [f'col{i}' for i in range(42)] + ['result']
data = pd.read_csv(decompressed_file_path, header=None, names=columns)

# Map 'win', 'loss', 'draw' to numeric values
data['result'] = data['result'].map({'win': 1, 'loss': -1, 'draw': 0})

# Convert board state to numeric (for simplicity, assume empty spaces are 0, 'x' is 1, 'o' is 2)
data = data.replace({'b': 0, 'x': 1, 'o': 2})

# Split dataset
X = data.drop(columns=['result']).values
y = data['result'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

BadGzipFile: Not a gzipped file (b'\x1f\x9d')

# LEGACY BELOW!

In [2]:
from connect_four import ConnectFour
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

class ConnectFourEnv:
    def __init__(self):
        self.game = ConnectFour()
        self.current_player = 1

    def reset(self):
        self.game = ConnectFour()
        self.current_player = 1
        return self.game.get_state().flatten()

    def step(self, action):
        observation, reward, terminated = self.game.step((action, self.current_player))
        self.current_player = 3 - self.current_player  # Switch player
        return observation.flatten(), reward, terminated, {}

    def get_valid_actions(self):
        return self.game.get_valid_col_id()

    def is_valid_action(self, action):
        return self.game.is_valid_col_id(action)

class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class DQNAgent:
    def __init__(self, state_dim, action_dim, learning_rate=0.001, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.model = DQN(state_dim, action_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.MSELoss()

    def choose_action(self, state, valid_actions):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(valid_actions)
        state = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.model(state)
        q_values = q_values.detach().numpy().flatten()
        q_values = [q_values[a] if a in valid_actions else -np.inf for a in range(self.action_dim)]
        return np.argmax(q_values)

    def learn(self, state, action, reward, next_state, done):
        state = torch.FloatTensor(state).unsqueeze(0)
        next_state = torch.FloatTensor(next_state).unsqueeze(0)
        reward = torch.FloatTensor([reward])
        action = torch.LongTensor([action])
        done = torch.FloatTensor([done])

        q_values = self.model(state)
        next_q_values = self.model(next_state)
        q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)
        next_q_value = reward + self.gamma * next_q_values.max(1)[0] * (1 - done)
        loss = self.criterion(q_value, next_q_value.detach())

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Continuous Training Loop
env = ConnectFourEnv()
state_dim = env.reset().shape[0]
action_dim = 7  # Number of columns in Connect Four
agent1 = DQNAgent(state_dim, action_dim)
agent2 = DQNAgent(state_dim, action_dim)

num_episodes = 10000
evaluation_interval = 1000
best_avg_reward = -np.inf

for episode in range(num_episodes):
    state = env.reset()
    done = False
    total_reward1 = 0
    total_reward2 = 0

    while not done:
        if env.current_player == 1:
            valid_actions = env.get_valid_actions()
            action = agent1.choose_action(state, valid_actions)
        else:
            valid_actions = env.get_valid_actions()
            action = agent2.choose_action(state, valid_actions)

        next_state, reward, done, _ = env.step(action)

        if env.current_player == 1:
            agent1.learn(state, action, reward, next_state, done)
            total_reward1 += reward
        else:
            agent2.learn(state, action, reward, next_state, done)
            total_reward2 += reward

        state = next_state

    avg_reward = (total_reward1 + total_reward2) / 2

    if avg_reward > best_avg_reward:
        best_avg_reward = avg_reward
        torch.save(agent1.model.state_dict(), 'best_agent1.pth')
        torch.save(agent2.model.state_dict(), 'best_agent2.pth')
        print(f"New best average reward: {best_avg_reward}. Models saved.")

    if episode % evaluation_interval == 0:
        print(f"Episode {episode}, Avg Reward: {avg_reward}")

print("Training completed.")

New best average reward: 5.0. Models saved.
Episode 0, Avg Reward: 5.0
Episode 1000, Avg Reward: 5.0
Episode 2000, Avg Reward: 5.0
Episode 3000, Avg Reward: 5.0
Episode 4000, Avg Reward: 0.25
Episode 5000, Avg Reward: 5.0
Episode 6000, Avg Reward: 5.0
Episode 7000, Avg Reward: 5.0
Episode 8000, Avg Reward: 5.0
Episode 9000, Avg Reward: 5.0
Training completed.


In [2]:
# Extract weights for agent1
weights1 = agent1.model.state_dict()
fc1_weight1 = weights1['fc1.weight'].numpy()
fc1_bias1 = weights1['fc1.bias'].numpy()
fc2_weight1 = weights1['fc2.weight'].numpy()
fc2_bias1 = weights1['fc2.bias'].numpy()
fc3_weight1 = weights1['fc3.weight'].numpy()
fc3_bias1 = weights1['fc3.bias'].numpy()

# Extract weights for agent2
weights2 = agent2.model.state_dict()
fc1_weight2 = weights2['fc1.weight'].numpy()
fc1_bias2 = weights2['fc1.bias'].numpy()
fc2_weight2 = weights2['fc2.weight'].numpy()
fc2_bias2 = weights2['fc2.bias'].numpy()
fc3_weight2 = weights2['fc3.weight'].numpy()
fc3_bias2 = weights2['fc3.bias'].numpy()

In [1]:
# Set print options for numpy to avoid truncation
np.set_printoptions(threshold=np.inf)
print("fc1_weight1: " + np.array2string(fc1_weight1, separator=', '))



NameError: name 'np' is not defined

In [13]:
# Set print options for numpy to avoid truncation
np.set_printoptions(threshold=np.inf)
print("fc1_bias1: [" + ", ".join(map(str, fc1_bias1)) + "]")


fc1_bias1: [124.28735, 124.247055, 124.13133, 124.04232, 124.05813, 123.896996, 124.16767, 124.26903, 124.2692, 124.33151, 124.29783, 124.04075, 124.25283, 124.22639, 124.240616, 124.290794, 124.25375, 124.16085, -0.13464019, 124.07205, 124.31239, 124.10564, 124.27688, 124.202705, 124.12982, 124.01562, 124.04947, 124.322655, 124.05492, 124.28683, 124.19297, 124.17865, 124.33303, 124.20267, 124.294754, 124.18406, 124.1137, 124.208664, 124.09232, 123.996735, 124.013336, 124.17714, 124.339424, 124.23878, 124.138756, 123.866425, 124.02551, 124.15055, 124.05794, 124.0848, 124.22317, 124.22488, 124.23878, 124.06531, 124.32336, 124.27865, 124.20193, 124.21544, 124.20395, 124.08565, 123.74747, 124.32389, 124.268265, 124.11943, 124.16243, 124.283104, 124.2778, 124.179115, 124.12095, 124.085594, 124.016014, 124.006195, 124.241005, 124.20248, 124.32482, 124.11995, 124.19164, 124.14168, 124.093475, 123.9974, 124.196915, 124.21552, 124.19656, 124.05875, 124.322296, 124.16359, 124.188324, 124.20511,

In [17]:
# Set print options for numpy to avoid truncation
np.set_printoptions(threshold=np.inf)
print("fc2_weight1: " + np.array2string(fc2_weight1, separator=', '))
#print("fc2_weight1: " + str(fc2_weight1))


fc2_weight1: [[ 1.01947762e+02,  1.02027634e+02,  1.01855774e+02,  1.02023041e+02,
   1.01835152e+02,  1.01853951e+02,  1.01858276e+02,  1.01998306e+02,
   1.02005516e+02,  1.01994957e+02,  1.01987015e+02,  1.01938126e+02,
   1.01956459e+02,  1.01958282e+02,  1.02005020e+02,  1.01968971e+02,
   1.02035568e+02,  1.01918266e+02,  1.36065828e-02,  1.01919800e+02,
   1.01963898e+02,  1.01875107e+02,  1.01959328e+02,  1.01999016e+02,
   1.01844307e+02,  1.01891121e+02,  1.01947906e+02,  1.01900597e+02,
   1.01958763e+02,  1.02069954e+02,  1.02015175e+02,  1.01945496e+02,
   1.01900856e+02,  1.01904335e+02,  1.02014969e+02,  1.01949326e+02,
   1.01872795e+02,  1.01919075e+02,  1.01875793e+02,  1.01921333e+02,
   1.01844612e+02,  1.02028008e+02,  1.01989014e+02,  1.01828964e+02,
   1.01912781e+02,  1.01816544e+02,  1.01990166e+02,  1.01973457e+02,
   1.01912354e+02,  1.01952316e+02,  1.01987129e+02,  1.01994804e+02,
   1.01937103e+02,  1.02011238e+02,  1.01846420e+02,  1.01980110e+02,
   1.01

In [18]:
# Set print options for numpy to avoid truncation
np.set_printoptions(threshold=np.inf)
print("fc2_bias1: [" + ", ".join(map(str, fc2_bias1)) + "]")
#print("fc2_bias1: " + str(fc2_bias1))


fc2_bias1: [121.59768, -0.045612372, -0.0010037818, 121.46755, 121.37988, -0.076778084, 0.004268723, -0.0063509294, 121.60253, 121.431015, -0.07764908, 0.056761377, 0.022250155, 121.31636, 121.460304, 0.06755628, 0.044707403, -0.036064062, 0.064480335, 121.45397, -0.05633984, 0.0072164144, -0.0054846397, 121.44957, 0.029741941, 121.469505, 0.02074147, -0.049761172, -0.082122, 0.004701345, 0.021541115, 121.477264, 121.38017, 0.06993361, 121.373985, 121.25347, 121.44908, 0.040038943, 121.29515, 121.43291, 121.4404, 121.45304, 121.567505, 121.495865, -0.083078496, 121.37321, 121.35008, 0.019157328, 121.42095, 121.42033, 121.41784, -0.05272863, 121.33618, 0.05295697, 121.49103, 121.40082, 121.375626, 121.358955, -0.016492004, -0.026530616, 0.02319022, -0.09327778, -0.08417156, -0.0036633122, 121.41043, 121.606155, -0.01844652, 0.0091473935, 121.444855, 121.37744, -0.0755822, 0.005605673, -0.020907825, 121.37182, 0.070426464, 121.45645, 121.333786, 121.29037, -0.07570197, 0.04644898, 121.43

In [19]:
# Set print options for numpy to avoid truncation
np.set_printoptions(threshold=np.inf)
print("fc3_weight1: " + np.array2string(fc3_weight1, separator=', '))
#print("fc3_weight1: " + str(fc3_weight1))


fc3_weight1: [[ 6.10471306e+01, -7.21142814e-02, -2.30397470e-02,  6.11240501e+01,
   6.10627975e+01,  2.22401507e-03,  1.40576074e-02,  1.01314427e-03,
   6.10084496e+01,  6.11145515e+01, -5.72608523e-02, -1.27962371e-02,
   2.94731651e-02,  6.10157814e+01,  6.11816788e+01, -4.73932177e-02,
  -7.16772350e-03,  2.43422594e-02,  3.36156934e-02,  6.10293808e+01,
  -6.44687191e-02, -7.69425463e-03, -6.78605661e-02,  6.10855370e+01,
   6.10535629e-02,  6.11370850e+01, -4.42261323e-02, -6.94156066e-02,
  -5.80950454e-02, -1.76254958e-02, -8.25443268e-02,  6.11469803e+01,
   6.10381622e+01, -6.73400760e-02,  6.11368446e+01,  6.10753517e+01,
   6.11221962e+01, -1.49468211e-02,  6.11776123e+01,  6.10744095e+01,
   6.10761909e+01,  6.10893021e+01,  6.10372734e+01,  6.10464935e+01,
  -3.53019163e-02,  6.11584473e+01,  6.10252342e+01, -4.01356705e-02,
   6.11745148e+01,  6.11450500e+01,  6.11053734e+01,  3.15846317e-02,
   6.10647850e+01, -5.47680594e-02,  6.11777000e+01,  6.11201248e+01,
   6.10

In [20]:
# Set print options for numpy to avoid truncation
np.set_printoptions(threshold=np.inf)
print("fc3_bias1: [" + ", ".join(map(str, fc3_bias1)) + "]")
#print("fc3_bias1: " + str(fc3_bias1))

fc3_bias1: [63.04843, 12.272941, 54.7145, 54.07681, 10.664435, 48.918995, 58.908257]


In [4]:
print("Weights for Agent 2: " + str(weights2))
print("fc1_weight2: " + str(fc1_weight2))
print("fc1_bias2: " + str(fc1_bias2))
print("fc2_weight2: " + str(fc2_weight2))
print("fc2_bias2: " + str(fc2_bias2))
print("fc3_weight2: " + str(fc3_weight2))
print("fc3_bias2: " + str(fc3_bias2))

Weights for Agent 1: OrderedDict([('fc1.weight', tensor([[100.0155,   3.7640,  71.9935,  ...,  13.8498,  63.0860,  94.0338],
        [ 99.9333,   4.0098,  71.9511,  ...,  13.5944,  62.7920,  93.9297],
        [ 99.9448,   3.7207,  72.0399,  ...,  13.9452,  63.2018,  94.0415],
        ...,
        [ 99.9300,   3.8432,  72.1367,  ...,  13.7681,  63.0094,  93.9149],
        [ 99.9165,   3.8412,  71.9397,  ...,  13.9017,  62.9413,  93.9238],
        [ 99.9817,   3.9233,  72.0321,  ...,  13.7514,  62.9734,  94.0402]])), ('fc1.bias', tensor([124.2874, 124.2471, 124.1313, 124.0423, 124.0581, 123.8970, 124.1677,
        124.2690, 124.2692, 124.3315, 124.2978, 124.0407, 124.2528, 124.2264,
        124.2406, 124.2908, 124.2538, 124.1609,  -0.1346, 124.0721, 124.3124,
        124.1056, 124.2769, 124.2027, 124.1298, 124.0156, 124.0495, 124.3227,
        124.0549, 124.2868, 124.1930, 124.1786, 124.3330, 124.2027, 124.2948,
        124.1841, 124.1137, 124.2087, 124.0923, 123.9967, 124.0133, 124.1771