In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Input datasets
inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
expected_output = np.array([[0], [1], [1], [0]])

# Neural network architecture
inputLayerNeurons, hiddenLayerNeurons, outputLayerNeurons = 2, 2, 1

# Random weights and bias initialization
hidden_weights = np.random.uniform(size=(inputLayerNeurons, hiddenLayerNeurons))
hidden_bias = np.random.uniform(size=(1, hiddenLayerNeurons))
output_weights = np.random.uniform(size=(hiddenLayerNeurons, outputLayerNeurons))
output_bias = np.random.uniform(size=(1, outputLayerNeurons))

# Training parameters
epochs = 10000
lr = 0.1

# Training algorithm
for _ in range(epochs):
    # Forward Propagation
    hidden_layer_output = sigmoid(np.dot(inputs, hidden_weights) + hidden_bias)
    predicted_output = sigmoid(np.dot(hidden_layer_output, output_weights) + output_bias)

    # Backpropagation
    error = expected_output - predicted_output
    d_predicted_output = error * sigmoid_derivative(predicted_output)
    error_hidden_layer = d_predicted_output.dot(output_weights.T)
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)

    # Updating Weights and Biases
    output_weights += np.dot(hidden_layer_output.T, d_predicted_output) * lr
    output_bias += np.sum(d_predicted_output, axis=0, keepdims=True) * lr
    hidden_weights += np.dot(inputs.T, d_hidden_layer) * lr
    hidden_bias += np.sum(d_hidden_layer, axis=0, keepdims=True) * lr

# Display final weights and biases
print("Final hidden weights:", hidden_weights)
print("Final hidden bias:", hidden_bias)
print("Final output weights:", output_weights)
print("Final output bias:", output_bias)

# Display output from the neural network after training
print("\nOutput from neural network after {} epochs:".format(epochs), predicted_output)


Final hidden weights: [[-2.74529869  4.04161058]
 [ 6.28943553  6.56725952]]
Final hidden bias: [[ 2.01222886 -0.73159857]]
Final output weights: [[-4.5046381 ]
 [ 5.21265188]]
Final output bias: [[-0.70525207]]

Output from neural network after 10000 epochs: [[0.04809493]
 [0.49717333]
 [0.94592611]
 [0.50495757]]


In [8]:
import numpy as np
#np.random.seed(0)
def sigmoid (x):
 return 1/(1 + np.exp(-x))
def sigmoid_derivative(x):
 return x * (1 - x)
#Input datasets
inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
expected_output = np.array([[0],[1],[1],[0]])
epochs = 10000
lr = 0.1
inputLayerNeurons, hiddenLayerNeurons, outputLayerNeurons = 2,2,1
#Random weights and bias initialization
hidden_weights = np.random.uniform(size=(inputLayerNeurons,hiddenLayerNeurons))
hidden_bias =np.random.uniform(size=(1,hiddenLayerNeurons))
output_weights = np.random.uniform(size=(hiddenLayerNeurons,outputLayerNeurons))
output_bias = np.random.uniform(size=(1,outputLayerNeurons))
print("Initial hidden weights: ",end='')
print(*hidden_weights)
print("Initial hidden biases: ",end='')
print(*hidden_bias)
print("Initial output weights: ",end='')
print(*output_weights)
print("Initial output biases: ",end='')
print(*output_bias)
#Training algorithm
for _ in range(epochs):
#Forward Propagation
 hidden_layer_activation = np.dot(inputs,hidden_weights)
 hidden_layer_activation += hidden_bias
 hidden_layer_output = sigmoid(hidden_layer_activation)
 output_layer_activation = np.dot(hidden_layer_output,output_weights)
 output_layer_activation += output_bias
 predicted_output = sigmoid(output_layer_activation)
#Backpropagation
 error = expected_output - predicted_output
 d_predicted_output = error * sigmoid_derivative(predicted_output)
 error_hidden_layer = d_predicted_output.dot(output_weights.T)
 d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)
#Updating Weights and Biases
 output_weights += hidden_layer_output.T.dot(d_predicted_output) * lr
 output_bias += np.sum(d_predicted_output,axis=0,keepdims=True) * lr
 hidden_weights += inputs.T.dot(d_hidden_layer) * lr
 hidden_bias += np.sum(d_hidden_layer,axis=0,keepdims=True) * lr
print("Final hidden weights: ",end='')
print(*hidden_weights)
print("Final hidden bias: ",end='')
print(*hidden_bias)
print("Final output weights: ",end='')
print(*output_weights)
print("Final output bias: ",end='')
print(*output_bias)
print("\nOutput from neural network after 10,000 epochs: ",end='')
print(*predicted_output)

Initial hidden weights: [0.29536945 0.42834439] [0.05060655 0.64332413]
Initial hidden biases: [0.54945496 0.55598398]
Initial output weights: [0.07155487] [0.55649169]
Initial output biases: [0.34004756]
Final hidden weights: [3.65309696 5.89583472] [3.63576931 5.80474932]
Final hidden bias: [-5.57998648 -2.44148321]
Final output weights: [-8.15691147] [7.47036729]
Final output bias: [-3.34094103]

Output from neural network after 10,000 epochs: [0.05877915] [0.94574118] [0.94602362] [0.05854959]


In [11]:
import numpy as np

# R matrix
R = np.matrix([[-1, -1, -1, -1, 0, -1],
               [-1, -1, -1, 0, -1, 100],
               [-1, -1, -1, 0, -1, -1],
               [-1, 0, 0, -1, 0, -1],
               [-1, 0, 0, -1, -1, 100],
               [-1, 0, -1, -1, 0, 100]])

# Q matrix
Q = np.matrix(np.zeros([6, 6]))

# Gamma (learning parameter).
gamma = 0.8

# Initial state. (Usually chosen at random)
initial_state = 1

# This function returns all available actions in the state given as an argument
def available_actions(state):
    current_state_row = R[state, ]
    av_act = np.where(current_state_row >= 0)[1]
    return av_act

# Get available actions in the current state
available_act = available_actions(initial_state)

# This function chooses at random which action to be performed within the range
# of all the available actions.
def sample_next_action(available_actions_range):
    next_action = int(np.random.choice(available_actions_range, 1))
    return next_action

# Sample next action to be performed
action = sample_next_action(available_act)

# This function updates the Q matrix according to the path selected and the Q
# learning algorithm
def update(current_state, action, gamma):
    max_index = np.where(Q[action, ] == np.max(Q[action, ]))[1]

    if max_index.shape[0] > 1:
        max_index = int(np.random.choice(max_index, size=1))
    else:
        max_index = int(max_index)

    max_value = Q[action, max_index]

    # Q learning formula
    Q[current_state, action] = R[current_state, action] + gamma * max_value

# Training
# Train over 10,000 iterations. (Re-iterate the process above).
for i in range(10000):
    current_state = np.random.randint(0, int(Q.shape[0]))
    available_act = available_actions(current_state)
    action = sample_next_action(available_act)
    update(current_state, action, gamma)

# Normalize the "trained" Q matrix
print("Trained Q matrix:")
print(Q / np.max(Q) * 100)

# Testing
# Goal state = 5
# Best sequence path starting from 2 -> 2, 3, 1, 5
current_state = 2
steps = [current_state]
while current_state != 5:
    next_step_index = np.where(Q[current_state, ] == np.max(Q[current_state, ]))[1]

    if next_step_index.shape[0] > 1:
        next_step_index = int(np.random.choice(next_step_index, size=1))
    else:
        next_step_index = int(next_step_index)

    steps.append(next_step_index)
    current_state = next_step_index

# Print selected sequence of steps
print("Selected path:")
print(steps)


Trained Q matrix:
[[  0.    0.    0.    0.   80.    0. ]
 [  0.    0.    0.   64.    0.  100. ]
 [  0.    0.    0.   64.    0.    0. ]
 [  0.   80.   51.2   0.   80.    0. ]
 [  0.   80.   51.2   0.    0.  100. ]
 [  0.   80.    0.    0.   80.  100. ]]
Selected path:
[2, 3, 4, 5]


In [12]:
import numpy as np

# R matrix
R = np.matrix([[-1, -1, -1, -1, 0, -1],
               [-1, -1, -1, 0, -1, 100],
               [-1, -1, -1, 0, -1, -1],
               [-1, 0, 0, -1, 0, -1],
               [-1, 0, 0, -1, -1, 100],
               [-1, 0, -1, -1, 0, 100]])

# Q matrix
Q = np.zeros_like(R, dtype=float)

# Gamma (learning parameter).
gamma = 0.8

# Initial state. (Usually chosen at random)
initial_state = 1

# This function returns all available actions in the state given as an argument
def available_actions(state):
    return np.where(R[state] >= 0)[1]

# This function chooses at random which action to be performed within the range
# of all the available actions.
def sample_next_action(available_actions_range):
    return int(np.random.choice(available_actions_range))

# Training
# Train over 10,000 iterations. (Re-iterate the process above).
for _ in range(10000):
    current_state = np.random.randint(Q.shape[0])
    available_act = available_actions(current_state)
    action = sample_next_action(available_act)
    max_index = np.argmax(Q[action])
    max_value = Q[action, max_index]
    Q[current_state, action] = R[current_state, action] + gamma * max_value

# Normalize the "trained" Q matrix
print("Trained Q matrix:")
print(Q / np.max(Q) * 100)

# Testing
# Goal state = 5
# Best sequence path starting from 2 -> 2, 3, 1, 5
current_state = 2
steps = [current_state]
while current_state != 5:
    next_step_index = np.argmax(Q[current_state])
    steps.append(next_step_index)
    current_state = next_step_index

# Print selected sequence of steps
print("Selected path:")
print(steps)


Trained Q matrix:
[[  0.    0.    0.    0.   80.    0. ]
 [  0.    0.    0.   64.    0.  100. ]
 [  0.    0.    0.   64.    0.    0. ]
 [  0.   80.   51.2   0.   80.    0. ]
 [  0.   80.   51.2   0.    0.  100. ]
 [  0.   80.    0.    0.   80.  100. ]]
Selected path:
[2, 3, 1, 5]
