In [1]:
import torch
import torch.nn as nn
import numpy as np
import os

In [46]:
import torch
import torch.nn as nn
import os
import numpy as np

class PT(nn.Module):

	def __init__(self):
		super().__init__()
		self.batch_num = 0
		self.layers_dims = [397, 1024, 512, 256, 128, 96] #  5-layer model
		self.learning_rate = 0.001
		checkpoint = False
		self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
		print(self.device)
		layers = []
		for n in range(len(self.layers_dims) - 2):
			layers.append(nn.Linear(self.layers_dims[n], self.layers_dims[n+1]))
			layers.append(nn.ReLU())
		layers.append(nn.Linear(self.layers_dims[n+1], self.layers_dims[n+2]))
		layers.append(nn.Softmax(dim=1))
		self.model = nn.Sequential(*layers)
		optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.learning_rate)
		available = self.check_for_params()
		if available:
			if input("Start from saved?") == "Y":
				# Restore variables from disk.
				self.model.load_state_dict(torch.load("Torch/model_weights.pth"))
				print("Session restored!")
				if input("Make checkpoint from saved?") == "Y":
					name = input("Checkpoint name:")
					self.save_obj(name)
			else:
				if input("Start from checkpoint?") == "Y":
					while checkpoint == False and name != 's':
						name = input("Checkpoint name [s = skip]:")
						if name != "s":
							self.load_checkpoint(name)
						else:
							print("Using initialized parameters.")
				else:
					print("Using initialized parameters.")
		else:
			print("Using initialized parameters.")
		if input("Dice roll or maximize?") == "M":
			self.move_type = "M"
		else:
			self.move_type = "R"
#		if input("Gradient check?") == "Y":
#			self.plot_activations()
#			self.check_gradients()
#			input("Paused.")
		self.initialize_training_batch()
		self.legal_means = []
		self.illegal_means = []
		self.trainings = 0
		self.apply(self._init_weights)
		self.activations = {}  # Dictionary to store activations
		self.hook_handles = []  # List to store hook handles
		self.model = self.model.to(self.device)


	def _init_weights(self, module):
		if isinstance(module, nn.Linear):
			torch.nn.init.xavier_uniform_(module.weight)
			if module.bias is not None:
				torch.nn.init.zeros_(module.bias)

	def add_hooks(self):
		# Register hooks and store handles
		for name, layer in self.named_modules():
			handle = layer.register_forward_hook(self._save_activation(name))
			self.hook_handles.append(handle)

	def remove_hooks(self):
		# Remove all hooks using stored handles
		for handle in self.hook_handles:
			handle.remove()
		self.hook_handles.clear()  # Clear the list after removal

	def forward_pass(self, x):
		x = self.convert(x)
		x = x.to(self.device)
		with torch.no_grad():
			x = self(x)
		x = self.deconvert(x)
		return x

	def forward(self, x, y=None):
		x = self.model(x)
		return x

	def generate_move(self, AL): # generate a move from a probabilities vector
		choice = np.squeeze(np.random.choice(96, 1, p=AL.flatten()/np.sum(AL.flatten()))) # roll the dice and p b
		one_hot_move = np.eye(96, dtype = 'int')[choice] #generate one-hot version
		piece_number = int(np.argmax(one_hot_move)/8) # get the piece number that the move applies to
		move = one_hot_move[(8 * piece_number):((8 * piece_number) + 8)] # generate the move for that piece

		return one_hot_move, piece_number, move

	def train_model(self, y, x, weights, illegal_masks):
		"""
		y: parallel set of unit-normalized legal move vectors to calculate cost.
		x: parallel set of input vectors.
		weights: parallel set of number of attempts at a move to weight the cost.
		illegal_masks: parallel set of non-normalized legal move vectors
		"""
		params = {}
		self.add_hooks()
		self.batch_num += 1
		x = self.convert(x)
		x = x.to(self.device)
		y = self.convert(y)
		y = y.to(self.device)
		weights = self.convert(weights)
		weights = weights.to(self.device)
		x = self(x)
		cost = ((y - x) ** 2) * weights
		cost = cost.mean()
		self.optimizer.zero_grad(set_to_none=True)
		cost.backward()
		self.optimizer.step()


		return cost, params





	def _save_activation(self, name):
		# Hook function to save activations
		def hook(model, input, output):
			self.activations[name] = output.detach().cpu()
		return hook

	def convert(self, x):
		x = torch.from_numpy(np.array(x, dtype=np.float32)).transpose(0,1)
		return x

	def deconvert(self, x):
		if x.requires_grad:
			x = x.transpose(0,1).detach().numpy().astype(np.float64)
		else:
			x = x.transpose(0,1).cpu().numpy().astype(np.float64)
		return x

	def initialize_training_batch(self):
		self.moves = []
		self.illegal_masks = []
		self.probabilities_batch = []
		self.X_batch = []
		self.attempts = [] # list with attempted (illegal) moves
		#self.attempts_illegal_masks = [] # parallel list with illegal masks for those attempts
		#self.attempts_probabilities = [] # parallel list with probability vectors for those attempts
		#self.attempts_X_batch = [] # parallel list with board inputs for those attempts
		self.num_attempts = 0 # total number of attempts to get to a legal move
		self.num_attempts_batch = []

	def save_parameters(self):
		self.save_obj("model_weights")

	def save_obj(self, name):
		torch.save(self.model.state_dict(), 'Torch/' + name + '.pth')


	def check_for_params(self):
		available = os.path.isfile("Torch/model_weights.pth")

		return available

	def load_checkpoint(self, name):
		try:
			self.model.load_state_dict(torch.load('Torch/' + name + '.pth'))
		except (OSError, IOError) as e:
			checkpoint = False
			print("Can't find that checkpoint...")
		if checkpoint != False:
			print("Checkpoint " + name + ".pth loaded!")
		return checkpoint

	def get_input_vector(self, board, color, jump_piece_number):
		v = board.get_piece_arrays(color)
		# if color == 'Red':
			# v = board.red_home_view().flatten() # get the board state 
		# else:
			# v = board.black_home_view().flatten()
		# v = np.append(v, board.get_piece_vector(color))
		if jump_piece_number != None:
			j_vector = np.eye(12)[jump_piece_number]
			jump = np.array([1])
		else:
			j_vector = np.zeros((12))
			jump = np.array([0])
		v = np.append(v, j_vector)
		v = np.append(v, jump)	
		return v




In [47]:
network = PT()

cpu


Start from saved? n
Start from checkpoint? n


Using initialized parameters.


Dice roll or maximize? R


In [48]:
random = np.random.rand(397,50)
X = np.where(random >= 0.5, 1, 0)

In [49]:
network.add_hooks()
Y = network.forward_pass(X)

In [50]:
print("Captured activations:", network.activations)

Captured activations: {'model.0': tensor([[-0.4154, -0.4769, -0.4227,  ...,  0.5654, -0.6044, -0.9812],
        [-0.4008,  0.0248,  0.7569,  ...,  0.0754, -0.9210, -0.0502],
        [ 0.5056, -0.7747,  0.0524,  ...,  0.4202,  0.0205, -0.2826],
        ...,
        [ 0.1459, -0.3692, -0.1355,  ..., -0.3077, -0.1146, -1.1875],
        [ 0.6481,  0.0870,  0.3939,  ..., -0.5996,  0.7100, -0.6257],
        [ 0.3573, -0.4643, -0.7054,  ..., -0.1771, -0.0200, -0.5332]]), 'model.1': tensor([[0.0000, 0.0000, 0.0000,  ..., 0.5654, 0.0000, 0.0000],
        [0.0000, 0.0248, 0.7569,  ..., 0.0754, 0.0000, 0.0000],
        [0.5056, 0.0000, 0.0524,  ..., 0.4202, 0.0205, 0.0000],
        ...,
        [0.1459, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.6481, 0.0870, 0.3939,  ..., 0.0000, 0.7100, 0.0000],
        [0.3573, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]), 'model.2': tensor([[ 0.5985, -0.1362, -0.7727,  ..., -0.3569, -0.3635, -0.5616],
        [ 0.3513,  0.0801, -1.0095,  ...

In [72]:
network.save_parameters()

In [22]:
j[0,:] = m[0,:]
j[3,:] = m[3,:]

In [23]:
j

array([[10,  0],
       [ 0,  0],
       [ 0,  0],
       [ 6,  7]])

In [7]:
np.zeros((96))

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [45]:
z = np.zeros(20)
y = np.zeros(20)
z[0] = 1

In [46]:
if not np.array_equal(y, z):
    print('equal')

equal


In [37]:
z

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [38]:
y

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [48]:
layers_dims = [397, 1024, 512, 256, 128, 96]

In [52]:
for l in enumerate(layers_dims):
    print(l)

<enumerate at 0x10f8a3600>

In [123]:
L = 5

In [124]:
for l in range (1, L):
    print(l)

1
2
3
4


In [129]:
red_parallel = np.zeros((10,10))
red_parallel_2 =np.ones((10,10))

In [127]:
red_parallel

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [128]:
red_parallel_batch = []

In [130]:
red_parallel_batch.append(red_parallel)
red_parallel_batch.append(red_parallel_2)
red_parallel_batch

[array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),
 array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]

In [147]:
Y = np.hstack(red_parallel_batch)

In [148]:
print(Y)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]


In [149]:
yTorch = torch.from_numpy(Y)

In [151]:
yTorch = yTorch * 2

In [157]:
yTorch

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2.]], dtype=tor

In [154]:
Y = Y * 2

In [156]:
Y

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2.]])

In [10]:
r = torch.rand(5,10)

In [11]:
r

tensor([[0.7302, 0.6091, 0.1549, 0.9017, 0.1234, 0.8782, 0.0391, 0.8252, 0.0432,
         0.1410],
        [0.5205, 0.8627, 0.4325, 0.9513, 0.7821, 0.6335, 0.5898, 0.4026, 0.9320,
         0.6654],
        [0.4595, 0.5178, 0.8663, 0.2911, 0.7448, 0.1995, 0.5466, 0.0338, 0.5519,
         0.9863],
        [0.5157, 0.6849, 0.8050, 0.6394, 0.1380, 0.7102, 0.9915, 0.2046, 0.5470,
         0.7592],
        [0.8816, 0.5947, 0.1256, 0.6725, 0.2499, 0.7425, 0.6869, 0.1677, 0.5164,
         0.1793]])

In [18]:
s = nn.Softmax(dim=1)

In [21]:
s(r).sum(1)

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000])