# Lab Assignment 1: Rock, Paper, Scissors

## Write and Run Your Own Code to Implement a Rock-Paper-Scissors Model

In [None]:
import numpy as np
import random
from sklearn.model_selection import train_test_split
import copy

# np.random.seed(0)
# random.seed(0)

def transfer_function(a):
   return np.tanh(a)

def transfer_derivative(a):
   return 1 - np.tanh(a)**2

#Changed to include all weights of theRNN (w_xh, w_hh, w_hy)
def loss_function(predicted, target, weights, lambda_reg=1e-5):#Using  Cross Entropy as told to do so by Professor Chin
   #Cross entropy better for classification task, diff between prob distributions
   epsilon = 1e-12 #preventing the log 0 issue
   predicted = np.clip(predicted, epsilon, 1. - epsilon) #for clipping
   cross_entropy = -np.mean(target * np.log(predicted)) 
   l2_penalty = 0.0
   for W in weights:
       l2_penalty += np.sum(W**2)
   l2_loss = (lambda_reg/2)* l2_penalty 
   return cross_entropy+l2_loss

def softmax(logits):
    exps = np.exp(logits - np.max(logits)) #takin out da outliers to stabilize distr.
    return exps / np.sum(exps) #gonna call this softmax later at the cross entropy section

class RNN:
   #Before we just called this c lass the neuron
   def __init__(self, input_size, hidden_size, output_size, lr=0.1):
       self.input_size = input_size
       self.hidden_size = hidden_size
       self.output_size = output_size
       self.lr = lr #learning rate, we set that in arg

       #x stands for input and y for otput in gen
       #randomizing these w out the fixed seeds as I saw too much repetition when i ran the original
       self.weight_xh = np.random.randn(hidden_size, input_size) * 0.01 #Weight (hidden_size x input_size)   [input-to-hidden weights]
       self.weight_hh = np.random.randn(hidden_size, hidden_size) * 0.01 #Weight hidden to hidden, basically recurrent weight
       self.bias_h  = np.zeros(hidden_size) 
       self.weight_hy = np.random.randn(output_size, hidden_size) * 0.01
       self.bias_y  = np.zeros(output_size)

   def forward_propagate(self, inputs):
       T = inputs.shape[0] # T is length of list
       hidden_states = []
       outputs = []
       hidden_state_prev = np.zeros(self.hidden_size)
       for t in range(T):
           input_t = inputs[t]
           activation_t = (self.weight_xh @ input_t) + (self.weight_hh @ hidden_state_prev) + self.bias_h
           hidden_t = transfer_function(activation_t) #hidden states

           logits = (self.weight_hy @ hidden_t) + self.bias_y #probabilities for softmax
           out_t = softmax(logits) #calling the func we made up top
           hidden_states.append(hidden_t)
           outputs.append(out_t)
           hidden_state_prev = hidden_t
       return hidden_states, outputs

   def back_propagate(self, inputs, hidden_states, outputs, targets, lambda_reg=1e-5):
       T = inputs.shape[0] 
       #all these are to get the gradients from each, let dW = derivative of weight
       #same size shape as weight from forwardprop
       dW_xh = np.zeros_like(self.weight_xh)
       dW_hh = np.zeros_like(self.weight_hh)
       db_h  = np.zeros_like(self.bias_h)
       dW_hy = np.zeros_like(self.weight_hy)
       db_y  = np.zeros_like(self.bias_y)
       dh_next = np.zeros(self.hidden_size)  #gradient hidden state from subsequent
       total_loss = 0.0
       weights = [self.weight_xh, self.weight_hh, self.weight_hy] #for regularizingg

       for t in reversed(range(T)):
           y_t = outputs[t] #use output variable y from engineering lingo cuz easier for me
           h_t = hidden_states[t]
           x_t = inputs[t]
           if t == 0:
               h_prev = np.zeros(self.hidden_size) #work out the zero case like no previous hidden state if 0s
           else:
               h_prev = hidden_states[t-1]
           #gotta get loss w/ reg.; also covers X entropy ofc 
           total_loss += loss_function(y_t, targets[t], weights, lambda_reg=lambda_reg) 
           dy = y_t - targets[t]  #(output_size, )
           dW_hy += np.outer(dy, h_t) #(output_size, hidden_size))
           db_y  += dy #(output_size, )
           dh = (self.weight_hy.T @ dy) + dh_next #grad. -> hid layer (hidden_size)
           da_t = dh * (1.0 - (h_t**2)) #grad tanh activation
           dW_xh += np.outer(da_t, x_t) #get grad wrt input to hidden & hiddenxhidden
           dW_hh += np.outer(da_t, h_prev)
           db_h  += da_t #grad to hid biass
           dh_next = self.weight_hh.T @ da_t #grad -> next itera.
       #must add L2 grad to each wt. gradient 
       dW_xh += lambda_reg * self.weight_xh
       dW_hh += lambda_reg * self.weight_hh
       dW_hy += lambda_reg * self.weight_hy
       #this is gradient descent updates
       self.weight_xh -= self.lr * dW_xh
       self.weight_hh -= self.lr * dW_hh
       self.bias_h  -= self.lr * db_h
       self.weight_hy -= self.lr * dW_hy
       self.bias_y  -= self.lr * db_y
       return total_loss
#in these args, dim is diff for each arg (i think?) at diff points
def network_init(input_dim, hidden_dim, output_dim, lr=0.1): #simple func initialize RNN class above, wasn;t orig like this
   return RNN(input_dim, hidden_dim, output_dim, lr) #easier to work w/

def rock_paper_scissor(history, num_epochs=50, lambda_reg=1e-5): #now we can use our history inputs and run through the RNN and plug into this
   print("Initializing RNN for Rock-Paper-Scissors...")
   input_dim = 6  #3 1-hot for each human nd cpu, respectively = 6
   hidden_dim = 50 #50 hiddn neurons for now
   output_dim = 3  #obviously (3 options for y)
   learning_rate = 1e-3
   rnn = network_init(input_dim, hidden_dim, output_dim, lr=learning_rate) #object
   steps = len(history)
   store_inputs = []
   store_comp_input = []
   print(f"Collecting {steps} steps from the training data...") #user can check if all of data being used

   for i in range(steps):
       human_move = history[i, 0]
       computer_move = history[i, 1]
       inp_vec = np.zeros(input_dim) 
       inp_vec[human_move] = 1
       inp_vec[computer_move + 3] = 1
       store_inputs.append(inp_vec)
       comp_vec = np.zeros(output_dim) #1-hot for cpu move of shape (3,)\
       """
       Logic if cpu move is 0 we get vector [1,0,0] and so on [0,1,0] or [0,0,1]
       """
       comp_vec[computer_move] = 1
       store_comp_input.append(comp_vec)

   # Build wincond is the target we built originally and it starts off empty and builds through the network
   print("Building winning condition targets from 'store_comp_input'...")
   wincond = []
   for x in store_comp_input:
       if np.array_equal(x, [1, 0, 0]): #cpu rock
           wincond.append(np.array([0, 1, 0]))
       elif np.array_equal(x, [0, 1, 0]): #cpu paper
           wincond.append(np.array([0, 0, 1]))
       else:  #cpu scissors
           wincond.append(np.array([1, 0, 0]))
           #changed the wincond originally from -1,1,0 etc. appending to one-hot's that beat cpu
           #mostly because of alteration from MSE to cross entropy 
   store_inputs = np.array(store_inputs) #(steps,6)
   wincond = np.array(wincond) #(steps,3)

   train_X, val_X, train_Y, val_Y = train_test_split(store_inputs, wincond, test_size=0.2, shuffle=True, random_state=42)
   print(f"Training samples: {(train_X[0])}, Validation samples: {val_X.shape[0]} ")
   patience=10 #num epochs to wait aftr las improvement
   best_val_loss = float('inf') #init best valid loss to inf
   patience_counter=0 #init patience count
   best_weights = None

   train_losses=[]
   val_losses=[]

   # Training loop
   for epoch in range(num_epochs):
       hidden_states, outputs = rnn.forward_propagate(train_X) #forward pass
       total_loss = rnn.back_propagate(train_X, hidden_states, outputs, train_Y, lambda_reg=lambda_reg) #backward paass WITH wincond targets
       train_losses.append(total_loss)

       #Validation
       val_hidden_states, val_outputs = rnn.forward_propagate(val_X)
       val_loss = 0.0
       for t in range(len(val_X)):
           val_loss += loss_function(val_outputs[t], val_Y[t], weights=[rnn.weight_xh, rnn.weight_hh, rnn.weight_hy], lambda_reg=lambda_reg)
       val_loss /= len(val_X)
       val_losses.append(val_loss)
       if(epoch>=num_epochs-5):
           print(f"Epoch {epoch+1}/{num_epochs}, Training Loss = {total_loss:.4f}, Validation Loss = {val_loss:.4f}") 

       #Early Stopping
       if val_loss<best_val_loss:
           best_val_loss=val_loss
           patience_counter=0
           best_weights = copy.deepcopy({
               'weight_xh': rnn.weight_xh.copy(),
               'weight_hh': rnn.weight_hh.copy(),
               'bias_h': rnn.bias_h.copy(),
               'weight_hy': rnn.weight_hy.copy(),
               'bias_y': rnn.bias_y.copy()
           })
           #print("Validation loss improved, saving model parameters.")
       else:
           patience_counter+=1
           print(f"No improvment in vaidation loss for {patience_counter} epochs")
           if patience_counter >= patience:
               print("Early stopping triggered.")
               #restoring best model parameters
               if best_weights is not None:
                   rnn.weight_xh = best_weights['weight_xh']
                   rnn.weight_hh = best_weights['weight_hh']
                   rnn.bias_h = best_weights['bias_h']
                   rnn.weight_hy = best_weights['weight_hy']
                   rnn.bias_y = best_weights['bias_y']
               break
        #LR Decay mess around weith this!!!! during testing
       if (epoch +1) % 60 == 0:
           rnn.lr *= 0.9  # .5 the learning rate
       #if(epoch==180):
   print("------------------------------------------------------------------------")
   if patience_counter<patience:
       print(f"Training completed after {epoch+1} epochs.")
   else:
       print(f"Training stopped early at epoch {epoch+1}")
   print(f"Decayed learning rate from 0.1 to {rnn.lr}")
   print(f"Epoch {epoch+1}/{num_epochs}, Cross Entropy Loss = {total_loss:.4f}")
       
   #need one more forward pass
   _, outputs_after = rnn.forward_propagate(store_inputs)
   final_output = outputs_after[-1] #last timestep
   final_output /= np.sum(final_output)

   recommended_max = np.random.choice([0,1,2], p=final_output) #making this rec now based on p distr.
   moves_map = {0: "Rock", 1: "Paper", 2: "Scissors"}
   recommended_move = moves_map[recommended_max]
   print(f"\nAlgorithm recommends: {recommended_move}\n")
   
   print("Calculating number of wins, ties, and losses from training data...")
   wins = 0.0
   ties = 0.0
   losses = 0.0
   for i in range(len(history)):
       human_move = history[i, 0]
       computer_move = history[i, 1]
       if human_move == computer_move:
           ties += 1
       elif (human_move - computer_move) % 3 == 1:
           wins += 1
       else:
           losses += 1
   print(f"Number of Wins: {wins}")
   print(f"Number of Ties: {ties}")
   print(f"Number of Losses: {losses}")
   percentage_win = wins/(wins+ties+losses)
   percentage_win1 = wins/(wins+losses)
   print
   return recommended_move

#executing the whole thang!
history = np.loadtxt("./training.txt", dtype=int).reshape(-1, 3)
recommended = rock_paper_scissor(history, num_epochs=500, lambda_reg=1e-5)
print("Recommended move after training loop:", recommended)

Initializing RNN for Rock-Paper-Scissors...
Collecting 504 steps from the training data...
Building winning condition targets from 'store_comp_input'...
Training samples: [0. 0. 1. 1. 0. 0.], Validation samples: 101 
Epoch 496/500, Training Loss = 0.2757, Validation Loss = 0.0007
Epoch 497/500, Training Loss = 0.2753, Validation Loss = 0.0007
Epoch 498/500, Training Loss = 0.2750, Validation Loss = 0.0007
Epoch 499/500, Training Loss = 0.2747, Validation Loss = 0.0007
Epoch 500/500, Training Loss = 0.2744, Validation Loss = 0.0007
------------------------------------------------------------------------
Training completed after 500 epochs.
Decayed learning rate from 0.1 to 0.00043046721
Epoch 500/500, Cross Entropy Loss = 0.2744

Algorithm recommends: Scissors

Calculating number of wins, ties, and losses from training data...
Number of Wins: 137
Number of Ties: 178
Number of Losses: 189
Recommended move after training loop: Scissors


## What to Submit?
1. A link to your repository (remember to add your Lab TA as a collaborator to this repository, or they won't be able to grade your work!).
2. Your training data in training.txt.
3. Your model evaluation data.
4. A brief write-up describing your algorithm in a couple of sentences, including figures if necessary (your design on paper).
5. Finally, please individually briefly reflect on your experience in a couple of sentences. What are you taking away from this lab? 