## Smart E - Learning Framework using Deep Q - Learning

### Importing the Libraries

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import gymnasium as gym
from gymnasium import spaces

import tensorflow as tf
import random

from collections import deque

2024-02-26 20:13:24.655744: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-26 20:13:25.231839: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-26 20:13:25.232173: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-26 20:13:25.331533: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-26 20:13:25.522440: I tensorflow/core/platform/cpu_feature_guar

### Checking whether Tensorflow able to detect the GPU

In [2]:
print(tf.config.list_physical_devices(device_type='GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2024-02-26 20:13:51.784317: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-26 20:13:52.306502: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-26 20:13:52.306644: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


### Defining the States

In [3]:
states = {}

states[0] = "Start"
states[1] = "Reading"
states[2] = "Watching(VL)"
states[3] = "Entertaining"
states[4] = "GettingBoredOrFrustration"
states[5] = "Rest/Sleep"
states[6] = "Writing"
states[7] = "PlayingaGame"
states[8] = "ClickingOnAnAd"
states[9] = "CourseCompletion"
states[10] = "QuitStudyOrDisengagement"

print("States defined for the environment\n")
for key in states.keys():
    print("State {} - Reflects {}".format(key, states[key]))

States defined for the environment

State 0 - Reflects Start
State 1 - Reflects Reading
State 2 - Reflects Watching(VL)
State 3 - Reflects Entertaining
State 4 - Reflects GettingBoredOrFrustration
State 5 - Reflects Rest/Sleep
State 6 - Reflects Writing
State 7 - Reflects PlayingaGame
State 8 - Reflects ClickingOnAnAd
State 9 - Reflects CourseCompletion
State 10 - Reflects QuitStudyOrDisengagement


### Defining the Actions

In [4]:
actions = {}

actions[1] = "Stay"
actions[2] = "PreviousCourse"
actions[3] = "AdditionalContent"
actions[4] = "DoAssignment/Quiz"
actions[5] = "MoveToExams"
actions[6] = "MoveToHighLevelCourse"
actions[7] = "MoveToLowLevelCourse"
actions[8] = "MoveToSocialMedia"

print("Actions defined for the environment\n")
for key in actions.keys():
    print("Action {} - Reflects {}".format(key, actions[key]))

Actions defined for the environment

Action 1 - Reflects Stay
Action 2 - Reflects PreviousCourse
Action 3 - Reflects AdditionalContent
Action 4 - Reflects DoAssignment/Quiz
Action 5 - Reflects MoveToExams
Action 6 - Reflects MoveToHighLevelCourse
Action 7 - Reflects MoveToLowLevelCourse
Action 8 - Reflects MoveToSocialMedia


### Defining the Rewards for each action

In [5]:
rewards = {}

initialRewards = [10, 20, 50, 60, 100, 100, 70, -10]

for key in actions.keys():
    rewards[actions[key]] = initialRewards[key - 1]

print("Rewards assigned for each of the action\n")
for key in rewards.keys():
    print("Action: {} - Reward: {}".format(key, rewards[key]))

Rewards assigned for each of the action

Action: Stay - Reward: 10
Action: PreviousCourse - Reward: 20
Action: AdditionalContent - Reward: 50
Action: DoAssignment/Quiz - Reward: 60
Action: MoveToExams - Reward: 100
Action: MoveToHighLevelCourse - Reward: 100
Action: MoveToLowLevelCourse - Reward: 70
Action: MoveToSocialMedia - Reward: -10


### Defining the custom environment

In [6]:
class Environment(gym.Env):

    def __init__(self):
        super(Environment, self).__init__()
        self.num_states = len(states)
        self.num_actions = len(actions)
        self.action_space = spaces.Discrete(self.num_actions)
        self.observation_space = spaces.Discrete(self.num_states)
        self.current_state = None

        self.rewards = {
            0: 10,
            1: 20,
            2: 50,
            3: 60,
            4: 100,
            5: 100,
            6: 70,
            7: -10
        }

    def reset(self) :
        self.current_state = np.random.randint(0, self.num_states)
        return self.current_state
    
    def step(self, action) :
        # Take action and transition to the next state based on the action
        if action in range(self.num_actions) :
            self.current_state = (self.current_state + action) % self.num_states
        else :
            raise ValueError("Invalid action")
        
        # Get reward based on the action taken
        reward = self.rewards.get(action, 0)

        done = False

        return self.current_state, reward, done, {}
    
    def render(self, mode='human') :
        print(f"Current State: {self.current_state}")

gym.register(id="Custom_ELearning_Env-v1", entry_point="elearningEnv:Custom_ELearning_Env")
# Defining the Environment class
environment = Environment()

# Calculate the State size and number of actions
stateSize = environment.observation_space.n
numberActions = environment.action_space.n

print("State size: ", stateSize)
print("Number of Actions: ", numberActions)

State size:  11
Number of Actions:  8


### Implementing the Neural Network

In [7]:
class NNetwork(tf.keras.Model):
    def __init__(self, state_size, action_size, seed = 42) :
        super(NNetwork, self).__init__()
        self.seed = tf.random.set_seed(seed)
        self.fc1 = tf.keras.layers.Dense(64, activation="relu")
        self.fc2 = tf.keras.layers.Dense(128, activation="relu")
        self.fc3 = tf.keras.layers.Dense(128, activation="relu")
        self.fc4 = tf.keras.layers.Dense(64, activation="relu")
        self.fc5 = tf.keras.layers.Dense(action_size)


    def call(self, state):
        x = self.fc1(state)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        return x

### Initializing the Hyperparameters

In [8]:
learningRate = 1e-4
miniBatchSize = 100
discountFactor = 0.99
replayBufferSize = int(1e5)
interpolationParameter = 1e-3

### Implementing the Experience Replay

In [9]:
class ReplayMemory(object):

    def __init__(self, capacity):
        self.device = tf.device("/GPU:0" if tf.test.is_gpu_available() else "/CPU:0")
        self.capacity = capacity
        self.memory = []

    def push(self, event):
        self.memory.append(event)
        if len(self.memory) > self.capacity:
            del self.memory[0]

    def sample(self, batch_size):
        experiences = random.sample(self.memory, k = batch_size) 
        states = tf.convert_to_tensor(np.vstack([e[0] for e in experiences if e is not None]), dtype=tf.float32)
        actions = tf.convert_to_tensor(np.vstack([e[1] for e in experiences if e is not None]), dtype=tf.int64)
        rewards = tf.convert_to_tensor(np.vstack([e[2] for e in experiences if e is not None]), dtype=tf.float32)
        next_states = tf.convert_to_tensor(np.vstack([e[3] for e in experiences if e is not None]), dtype=tf.float32)
        dones = tf.convert_to_tensor(np.vstack([e[4] for e in experiences if e is not None]).astype(np.uint8), dtype=tf.float32)
        return states, actions, rewards, next_states, dones

### Implementing the Deep Q - Learning Agent

In [13]:
class Agent():

    def __init__(self, state_size, action_size):
        self.device = tf.device(device_name="/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0")
        self.state_size = state_size
        self.action_size = action_size
        self.local_network = NNetwork(state_size, action_size)
        self.target_network = NNetwork(state_size, action_size)
        self.optimizer = tf.optimizers.Adam(learning_rate=learningRate)
        self.memory = ReplayMemory(replayBufferSize)
        self.t_step = 0

    def step(self, state, action, reward, next_state, done):
        self.memory.push((state, action, reward, next_state, done))
        self.t_step = (self.t_step + 1) % 4
        if self.t_step == 0:
            if len(self.memory.memory) > miniBatchSize:
                experiences = self.memory.sample(100)
                self.learn(experiences, discountFactor)

    def act(self, state, epsilon = 0.):
        state = tf.expand_dims(tf.convert_to_tensor(state, dtype=tf.float32), axis = 0)
        self.local_network.eval()
        action_values = self.local_network(state)
        self.local_network.train()
        if random.random() > epsilon:
            return tf.math.argmax(action_values.numpy())
        else:
            return random.choice(np.arange(self.action_size))
        
    def learn(self, experiences, discount_factor):
        states, next_states, actions, rewards, dones = experiences
        next_q_targets = self.target_network(next_states).numpy().max(axis=1).reshape((-1, 1))
        q_targets = rewards + discount_factor * next_q_targets * (1 - dones)
        with tf.GradientTape() as tape:
            q_expected = self.local_network(states)
            actions_one_hot = tf.one_hot(actions, self.action_size)
            q_expected_action = tf.reduce_sum(q_expected * actions_one_hot, axis=1, keepdims=True)
            loss = tf.reduce_mean(tf.square(q_expected_action - q_targets))
        gradients = tape.gradient(loss, self.local_network.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.local_network.trainable_variables))
        self.soft_update(self.local_network, self.target_network, interpolationParameter)

    def soft_update(self, local_model, target_model, interpolation_parameter) :
        for target_param, local_param in zip(target_model.trainable_variables, local_model.trainable_parameters) :
            target_param.assign(interpolation_parameter * local_param + (1.0 - interpolation_parameter) * target_param)

        

### Initializing the Deep Q - Network Class

In [11]:
agent = Agent(stateSize, numberActions)

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


2024-02-26 20:15:20.200340: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-26 20:15:20.200460: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-26 20:15:20.200479: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-26 20:15:20.365222: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-26 20:15:20.365361: I external/local_xla/xla/stream_executor

### Training the Deep Q - Network Agent

In [14]:
numberEpisodes = 100
maximumNumberTimestampsPerEpisode = 1000
epsilonStartingValue = 1.0
epsilonEndingValue = 0.01
epsilonDecayValue = 0.995
epsilon = epsilonStartingValue
scoresOn10Episodes = deque(maxlen = 10)

for episode in range(1, numberEpisodes + 1) :
    state = environment.reset()
    score = 0
    for t in range(maximumNumberTimestampsPerEpisode) :
        action = agent.act(state, epsilon)
        next_state, reward, done, _, _ = environment.step(action)
        agent.step(state, action, reward, next_state, done)
        state = next_state
        score += reward
        if done :
            break
    scoresOn10Episodes.append(score)
    epsilon = max(epsilonEndingValue, epsilonDecayValue * epsilon)
    print('\rEpisode: {}\tAverage Score: {:.2f}'.format(episode, np.mean(scoresOn10Episodes)), end='')
    if episode % 10 == 0 :
        print("\rEpisode: {}\tAverage Score: {:.2f}".format(episode, np.mean(scoresOn10Episodes)))

AttributeError: 'NNetwork' object has no attribute 'eval'