In [30]:
import gym
import numpy as np
from gym import spaces
from sklearn.preprocessing import LabelEncoder

In [31]:
import pandas as pd

In [32]:
# Preprocessing the dataset
def preprocess_data(df):
    # Convert 'Weapon Detected' to binary: 1 for 'Yes', 0 for 'No'
    df['Weapon Detected'] = df['Weapon Detected'].apply(lambda x: 1 if x == 'Yes' else 0)

    # Encode 'Timestamp' using a LabelEncoder for simplicity
    label_encoder = LabelEncoder()
    df['Timestamp'] = label_encoder.fit_transform(df['Timestamp'])
    
    # Convert the dataframe into numpy array for faster operations
    state_data = df[['Timestamp', 'Presence Sensor', 'Persons Recognized', 'Weapon Detected']].values
    actions = df['Action'].values
    
    return state_data, actions

In [33]:
data = pd.read_csv('weekday_normal.csv')

In [34]:
data

Unnamed: 0,Timestamp,Presence Sensor,Persons Recognized,Weapon Detected,Action
0,00:00 AM,0,0,No,0
1,00:10 AM,0,0,No,0
2,00:20 AM,0,0,No,0
3,00:30 AM,0,0,No,0
4,00:40 AM,0,0,No,0
...,...,...,...,...,...
139,11:10 PM,0,0,No,0
140,11:20 PM,0,0,No,0
141,11:30 PM,0,0,No,0
142,11:40 PM,0,0,No,0


In [35]:
# Load and preprocess the data
state_data, actions = preprocess_data(data)

In [36]:
state_data

array([[  0,   0,   0,   0],
       [  1,   0,   0,   0],
       [  2,   0,   0,   0],
       [  3,   0,   0,   0],
       [  4,   0,   0,   0],
       [  5,   0,   0,   0],
       [  6,   0,   0,   0],
       [  8,   0,   0,   0],
       [ 10,   0,   0,   0],
       [ 12,   0,   0,   0],
       [ 14,   0,   0,   0],
       [ 16,   0,   0,   0],
       [ 18,   0,   0,   0],
       [ 20,   0,   0,   0],
       [ 22,   0,   0,   0],
       [ 24,   0,   0,   0],
       [ 26,   0,   0,   0],
       [ 28,   0,   0,   0],
       [ 30,   0,   0,   0],
       [ 32,   0,   0,   0],
       [ 34,   0,   0,   0],
       [ 36,   0,   0,   0],
       [ 38,   0,   0,   0],
       [ 40,   0,   0,   0],
       [ 42,   0,   0,   0],
       [ 44,   0,   0,   0],
       [ 46,   0,   0,   0],
       [ 48,   0,   0,   0],
       [ 50,   0,   0,   0],
       [ 52,   0,   0,   0],
       [ 54,   0,   0,   0],
       [ 56,   0,   0,   0],
       [ 58,   0,   0,   0],
       [ 60,   1,   1,   0],
       [ 62,  

In [37]:
actions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [97]:
# Define the custom environment
class SecuritySystemEnv(gym.Env):
    def __init__(self, state_data, actions):
        super(SecuritySystemEnv, self).__init__()
        self.state_data = state_data  # The states from the dataset
        self.actions_data = actions  # The action column
        
        # The state is defined by the 4 columns: Timestamp, Presence Sensor, Persons Recognized, Weapon Detected
        self.observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
        
        # The agent can take 2 actions: 0 (do nothing) or 2 (inform owner and police)
        self.action_space = spaces.Discrete(2)  # Actions: 0 or 2
        
        # Track the current index (row in the dataset)
        self.current_step = 0
    
    def reset(self):
        # Reset the environment to the first state
        self.current_step = 0
        return self.state_data[self.current_step]
    
    def step(self, action):
        # Get the correct action from the dataset for this step
        correct_action = self.actions_data[self.current_step]

        # Calculate reward: +1 if action matches the dataset action, -1 otherwise
        reward = 1 if (action * 2) == correct_action else -1

        # Move to the next step (next row in the dataset)
        self.current_step += 1

        # Check if we have reached the end of the dataset
        done = self.current_step >= len(self.state_data)
        #done = self.current_step < len(self.state_data)

        # Get the next state
        if not done:
            next_state = self.state_data[self.current_step]
        else:
            next_state = np.zeros(self.state_data.shape[1])  # Return a dummy state at the end
            self.current_step = len(self.state_data) - 1  # Ensure current_step doesn't go out of bounds

        return next_state, reward, done, {}
    
    
    def render(self, mode='human'):
        print(f"Step: {self.current_step}, State: {self.state_data[self.current_step]}")
        #print(f"Step: {self.current_step}")
    

In [98]:
from stable_baselines3 import DQN

In [99]:
# Create the environment
env = SecuritySystemEnv(state_data, actions)

In [100]:
len(state_data)

144

In [101]:
state_data[143]

array([137,   0,   0,   0])

In [102]:
# Initialize the DQN model
model = DQN('MlpPolicy', env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [103]:
# Train the model
model.learn(total_timesteps=10000)

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 144      |
|    ep_rew_mean      | 32.5     |
|    exploration_rate | 0.453    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 2240     |
|    time_elapsed     | 0        |
|    total_timesteps  | 576      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0929   |
|    n_updates        | 118      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 144      |
|    ep_rew_mean      | 75       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1899     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1152     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.106    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 144      |
|    ep_rew_mean      | 129      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 68       |
|    fps              | 1631     |
|    time_elapsed     | 6        |
|    total_timesteps  | 9792     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000182 |
|    n_updates        | 2422     |
----------------------------------


<stable_baselines3.dqn.dqn.DQN at 0x7f47494e8b20>

In [107]:
# Save the trained model
model.save("dqn_security_system")

In [66]:
# Load the trained model (if needed in the future)
# model = DQN.load("dqn_security_system")

In [110]:
# Test the trained agent
state = env.reset()
done = False
total_reward = 0

In [111]:
while not done:
    action, _ = model.predict(state)  # Predict the action from the trained model
    next_state, reward, done, _ = env.step(action)  # Take the action
    print(f"Reward is {reward}")
    total_reward += reward  # Accumulate rewards
    env.render()  # Print the current state
    state = next_state  # Move to the next state

Reward is 1
Step: 1, State: [1 0 0 0]
Reward is 1
Step: 2, State: [2 0 0 0]
Reward is 1
Step: 3, State: [3 0 0 0]
Reward is 1
Step: 4, State: [4 0 0 0]
Reward is 1
Step: 5, State: [5 0 0 0]
Reward is 1
Step: 6, State: [6 0 0 0]
Reward is 1
Step: 7, State: [8 0 0 0]
Reward is 1
Step: 8, State: [10  0  0  0]
Reward is 1
Step: 9, State: [12  0  0  0]
Reward is 1
Step: 10, State: [14  0  0  0]
Reward is 1
Step: 11, State: [16  0  0  0]
Reward is 1
Step: 12, State: [18  0  0  0]
Reward is 1
Step: 13, State: [20  0  0  0]
Reward is 1
Step: 14, State: [22  0  0  0]
Reward is 1
Step: 15, State: [24  0  0  0]
Reward is 1
Step: 16, State: [26  0  0  0]
Reward is 1
Step: 17, State: [28  0  0  0]
Reward is 1
Step: 18, State: [30  0  0  0]
Reward is 1
Step: 19, State: [32  0  0  0]
Reward is 1
Step: 20, State: [34  0  0  0]
Reward is 1
Step: 21, State: [36  0  0  0]
Reward is 1
Step: 22, State: [38  0  0  0]
Reward is 1
Step: 23, State: [40  0  0  0]
Reward is 1
Step: 24, State: [42  0  0  0]
Rewar

In [106]:
total_reward

140