In [35]:
import numpy as np
np.set_printoptions(suppress = True)
# Grid dimensions
grid_height = 5
grid_width = 5

# Discount factor
gamma = 0.9

# Initialize value function for each state
V = np.zeros((grid_height, grid_width))

# Transition probabilities for an equiprobable random policy
# Each action (up, down, left, right) has an equal probability of 0.25
actions = ['up', 'down', 'left', 'right']
action_prob = 1 / len(actions)

# Define the possible movements for each action
movements = {
    'up': (-1, 0),
    'down': (1, 0),
    'left': (0, -1),
    'right': (0, 1)
}

# Function to get next state given current state and action
def get_next_state(state, action):
    y, x = state
    dy, dx = movements[action]
    new_y, new_x = y + dy, x + dx

    if (y == 0 and x == 1):
        return 4, 1, 10 
    
    if (y == 0 and x == 3):
        return 2, 3, 5 
    
    reward = 0
    # Check for out-of-boundary moves
    if new_y < 0 or new_y >= grid_height or new_x < 0 or new_x >= grid_width:
        return y , x , -1 # Return the same state for out-of-bound moves
    
    return new_y, new_x, reward

# Value iteration algorithm
def value_iteration(iterations):
    for i in range(iterations):
        new_V = np.copy(V)
        
        for y in range(grid_height):
            for x in range(grid_width):
                # Calculate the value for each state
                values_sum = 0
                values = []
                for action in actions:
                    ny, nx, reward = get_next_state((y, x), action)    
                    next_state = ny,nx
                    values.append(action_prob * (reward + gamma * V[next_state]))
                    
                values_sum = sum(values)
                
                max_value = max(values)
                if (y == 0 and x == 1):
                    max_value = values_sum
                if (y == 0 and x == 3):
                    max_value = values_sum

                # Update the value function
                new_V[y, x] = max_value

        # Update value function for all states
        np.copyto(V, new_V)

    return V

# Run the value iteration for 5 iterations
iterations = 100
V_result = value_iteration(iterations)

print("State-Value Function after 5 iterations:")
print(V_result)

State-Value Function after 5 iterations:
[[ 2.25520185 10.02311934  2.25520185  5.23868771  1.17870473]
 [ 0.50742042  2.25520185  0.50742042  1.17870473  0.26520857]
 [ 0.11416959  0.50742042  0.11416959  0.26520857  0.05967193]
 [ 0.02568816  0.11416959  0.02568816  0.05967193  0.01342618]
 [ 0.00577984  0.02568816  0.00577984  0.01342618  0.00302089]]


In [3]:
!jupyter nbconvert --to PDF "mdp.ipynb"

[NbConvertApp] Converting notebook mdp.ipynb to PDF
[NbConvertApp] Writing 26624 bytes to notebook.tex
[NbConvertApp] Building PDF
[NbConvertApp] Running xelatex 3 times: ['xelatex', 'notebook.tex', '-quiet']
[NbConvertApp] Running bibtex 1 time: ['bibtex', 'notebook']
[NbConvertApp] PDF successfully created
[NbConvertApp] Writing 26591 bytes to mdp.pdf


In [34]:
import numpy as np

# Define the grid world parameters
grid_size = 5
discount_factor = 0.9
theta = 0.01  # Convergence threshold

# Initialize the value function
V = np.zeros((grid_size, grid_size))

# Define the reward function
def reward(state, action):
    if state == (0, 1):
        return 10  # Reward for transitioning from state (2, 1) to (2, 5)
    elif state == (0, 3):
        return 5  # Reward for transitioning from state (4, 1) to (4, 3)
    elif action_takes_off_grid(state, action):
        return -1  # Reward for going off the grid
    else:
        return 0  # Default reward for other moves

# Define the transition model
def next_state(state, action):
    if state == (0, 1):
        return (4, 1)
    elif state == (0, 3):
        return (2, 3)
    else:
        return move(state, action)

# Check if action takes agent off the grid
def action_takes_off_grid(state, action):
    next_position = move(state, action)
    return not (0 <= next_position[0] < grid_size and 0 <= next_position[1] < grid_size)

# Define possible actions (up, down, left, right)
actions = [(0, 1), (0, -1), (1, 0), (-1, 0)]

# Function to move to the next state
def move(state, action):
    return (state[0] + action[0], state[1] + action[1])

# Value iteration
def value_iteration():
        new_V = np.copy(V)
        for i in range(grid_size-1):
            for j in range(grid_size-1):
                state = (i, j)
                # Compute the value for all actions and take the max
                action_values = []
                for action in actions:
                    new_state = next_state(state, action)
                    reward_value = reward(state, action)
                    action_value = reward_value + discount_factor * V[new_state[0], new_state[1]]
                    action_values.append(action_value)
                
                new_V[i, j] = max(action_values)
                 # Update value function for all states
        np.copyto(V, new_V)

# Compute the value function
for i in range(1000):
    value_iteration()

# Display the value function
print("Optimal Value Function:")
print(V)

Optimal Value Function:
[[10.89464945 10.         16.60516605 18.4501845   0.        ]
 [12.10516605 13.4501845  14.94464945 16.60516605  0.        ]
 [10.89464945 12.10516605 13.4501845  14.94464945  0.        ]
 [ 9.8051845  10.89464945 12.10516605 13.4501845   0.        ]
 [ 0.          0.          0.          0.          0.        ]]


In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Example context words
context = ["jamoon", "is", "really"]

# Vocabulary size (for example purposes, set a small arbitrary size)
vocab_size = 1000
embedding_dim = 50

# Example function to simulate tokenizing and getting integer indices
# This should be replaced with a real tokenizer for practical use
def tokenize_words(words):
    tokenizer = Tokenizer(num_words=vocab_size)
    tokenizer.fit_on_texts(words)
    return tokenizer.texts_to_sequences([words])[0]

# Tokenize the context words
context_indices = tokenize_words(context)

# Create the model
model = Sequential()

# Embedding layer: Convert word indices to dense vectors
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=3))

# Flatten layer: Flatten the 2D matrix into a 1D vector
model.add(Flatten())

# Hidden layer with 4 nodes and ReLU activation
model.add(Dense(4, activation='relu'))

# Output layer with vocabulary size nodes and Softmax activation
model.add(Dense(vocab_size, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Example data for demonstration (not real data)
# In practice, you would have real training data and labels
context_data = np.array([context_indices])
target_data = np.array([10])  # Assuming "quite" is token 10 in the vocabulary

# One-hot encode the target data
target_data = tf.keras.utils.to_categorical(target_data, num_classes=vocab_size)

# Train the model (for demonstration purposes, this won't actually train without real data)
model.fit(context_data, target_data, epochs=10, verbose=1)

# Print model summary
model.summary()

Epoch 1/10




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 509ms/step - accuracy: 0.0000e+00 - loss: 6.9081
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 6.9045
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 1.0000 - loss: 6.9009
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 1.0000 - loss: 6.8972
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 1.0000 - loss: 6.8935
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step - accuracy: 1.0000 - loss: 6.8898
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - accuracy: 1.0000 - loss: 6.8860
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 1.0000 - loss: 6.8821
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [