In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [2]:
import gym
from gym import spaces
import random
from collections import deque, defaultdict

In [3]:
# Load the dataset
file_path = "datasets/weekday_normal.csv"

In [4]:
data = pd.read_csv(file_path)

In [5]:
def preprocess_data(df):
    # Convert 'Weapon Detected' to binary (1 for 'Yes', 0 for 'No')
    df['Weapon Detected'] = df['Weapon Detected'].apply(lambda x: 1 if x == 'Yes' else 0)

    # Encode 'Timestamp' using LabelEncoder
    label_encoder = LabelEncoder()
    df['Timestamp'] = label_encoder.fit_transform(df['Timestamp'])

    # Extract state and action data
    state_data = df[['Timestamp', 'Presence Sensor', 'Persons Recognized', 'Weapon Detected']].values
    actions = df['Action'].values
    
    return state_data, actions

In [6]:
# Preprocess the dataset
state_data, actions = preprocess_data(data)

In [7]:
class RealTimeEnvironment(gym.Env):
    def __init__(self, state_data, actions):
        super(RealTimeEnvironment, self).__init__()
        self.state_data = state_data
        self.actions_data = actions
        self.current_step = 0

        # Define observation space (4 features: Timestamp, Presence Sensor, Persons Recognized, Weapon Detected)
        self.observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
        
        # Action space: 2 actions (0: do nothing, 2: inform owner and police)
        self.action_space = spaces.Discrete(2)
        
    def reset(self):
        self.current_step = 0
        return self.state_data[self.current_step]

    def step(self, action):
        correct_action = self.actions_data[self.current_step]
        reward = 1 if (action * 2) == correct_action else -1
        
        # Move to the next step
        self.current_step += 1
        done = self.current_step >= len(self.state_data)
        
        # Get the next state
        if not done:
            next_state = self.state_data[self.current_step]
        else:
            next_state = np.zeros(self.state_data.shape[1])
        
        return next_state, reward, done, {}
    
    def render(self):
        print(f"Step: {self.current_step}, State: {self.state_data[self.current_step]}")

In [21]:
# Q-Learning parameters
learning_rate = 0.1
discount_factor = 0.99
exploration_rate = 1.0
exploration_decay = 0.995
exploration_min = 0.01
total_episodes = 1000  # Number of episodes for training
state_discretizer = (10, 2, 2, 2)  # Tuple representing discrete bins for each state feature

In [22]:
# Initialize the environment
env = RealTimeEnvironment(state_data, actions)

In [23]:
# Initialize Q-table using a defaultdict for easy default values
q_table = defaultdict(lambda: [0, 0])  # Two actions, initialized to zero

In [24]:
q_table

defaultdict(<function __main__.<lambda>()>, {})

In [25]:
# Helper function to discretize continuous states
def discretize_state(state):
    """Convert continuous state into a discrete tuple."""
    return tuple(int(state[i] // (1 / state_discretizer[i])) for i in range(len(state)))

In [26]:
# Metrics for tracking performance
reward_history = deque(maxlen=100)
action_accuracy = deque(maxlen=100)

In [27]:
# Continuous Q-learning training loop
for episode in range(total_episodes):
    state = env.reset()
    state = discretize_state(state)
    done = False
    episode_reward = 0

    while not done:
        # Epsilon-greedy action selection
        if random.uniform(0, 1) < exploration_rate:
            action = env.action_space.sample()  # Explore
        else:
            action = np.argmax(q_table[state])  # Exploit the best known action
        
        # Take the action in the environment
        next_state, reward, done, _ = env.step(action)
        next_state = discretize_state(next_state)
        
        # Update Q-value using Q-learning formula
        best_future_q = max(q_table[next_state]) if not done else 0
        q_table[state][action] += learning_rate * (reward + discount_factor * best_future_q - q_table[state][action])
        
        # Update state and metrics
        state = next_state
        episode_reward += reward
        action_accuracy.append(1 if reward == 1 else 0)  # Track if action was correct
        
    # Track episode reward
    reward_history.append(episode_reward)

    # Decay exploration rate
    exploration_rate = max(exploration_min, exploration_rate * exploration_decay)
    
    # Output training progress
    if episode % 10 == 0:
        average_reward = np.mean(reward_history)
        success_rate = np.mean(action_accuracy)
        print(f"Episode {episode}: Avg Reward = {average_reward:.2f}, Success Rate = {success_rate:.2f}")

Episode 0: Avg Reward = -14.00, Success Rate = 0.43
Episode 10: Avg Reward = 4.36, Success Rate = 0.54
Episode 20: Avg Reward = 4.48, Success Rate = 0.52
Episode 30: Avg Reward = 8.32, Success Rate = 0.58
Episode 40: Avg Reward = 11.56, Success Rate = 0.62
Episode 50: Avg Reward = 15.84, Success Rate = 0.56
Episode 60: Avg Reward = 19.44, Success Rate = 0.64
Episode 70: Avg Reward = 22.68, Success Rate = 0.67
Episode 80: Avg Reward = 25.80, Success Rate = 0.62
Episode 90: Avg Reward = 29.08, Success Rate = 0.70
Episode 100: Avg Reward = 32.16, Success Rate = 0.70
Episode 110: Avg Reward = 37.44, Success Rate = 0.73
Episode 120: Avg Reward = 43.16, Success Rate = 0.72
Episode 130: Avg Reward = 48.18, Success Rate = 0.74
Episode 140: Avg Reward = 53.84, Success Rate = 0.85
Episode 150: Avg Reward = 57.94, Success Rate = 0.82
Episode 160: Avg Reward = 61.94, Success Rate = 0.73
Episode 170: Avg Reward = 65.84, Success Rate = 0.72
Episode 180: Avg Reward = 69.56, Success Rate = 0.85
Episod

In [28]:
action_accuracy

deque([1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1,
       1],