In [1]:
!pip install gymnasium




In [2]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces
import pandas as pd



In [3]:
class hr_sim_env(gym.Env):
    def __init__(self, csv_path):
        super(hr_sim_env, self).__init__()
        
        # Load your data (features + simulated_hr)
        self.df = pd.read_csv(csv_path)
        self.current_idx = 0
        
        # Define observation space - all features except filename and simulated_hr
        feature_cols = [col for col in self.df.columns if col not in ['filename', 'simulated_hr']]
        self.features = self.df[feature_cols].values.astype(np.float32)
        
        # Define simulated HR targets
        self.targets = self.df['simulated_hr'].values.astype(np.float32)
        
        # Observation space shape = number of features
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.features.shape[1],), dtype=np.float32)
        
        # Action space = predicted heart rate (continuous scalar, say between 50 and 180)
        self.action_space = spaces.Box(low=50, high=160, shape=(1,), dtype=np.float32)

    def reset(self):
        self.current_idx = 0
        return self.features[self.current_idx]

    def step(self, action):
        done = False
        reward = 0
        
        # Get current target HR
        true_hr = self.targets[self.current_idx]
        
        # Calculate reward based on prediction accuracy (negative absolute error)
        pred_hr = action[0]
        error = abs(pred_hr - true_hr)
        reward = -error  # penalize error; higher reward when error is smaller
        
        self.current_idx += 1
        
        # Check if episode finished
        if self.current_idx >= len(self.df):
            done = True
            obs = np.zeros_like(self.features[0])
        else:
            obs = self.features[self.current_idx]
        
        info = {'true_hr': true_hr, 'pred_hr': pred_hr, 'error': error}
        return obs, reward, done, info

In [4]:
env = hr_sim_env("speech_features_with_hr.csv")
obs = env.reset()

done = False
while not done:
    action = env.action_space.sample()  
    obs, reward, done, info = env.step(action)
    print(f"True HR: {info['true_hr']:.2f}, Predicted HR: {info['pred_hr']:.2f}, Reward: {reward:.2f}")


True HR: 99.58, Predicted HR: 141.31, Reward: -41.73
True HR: 99.54, Predicted HR: 71.03, Reward: -28.52
True HR: 100.00, Predicted HR: 124.78, Reward: -24.78
True HR: 100.00, Predicted HR: 81.50, Reward: -18.50
True HR: 100.00, Predicted HR: 108.34, Reward: -8.34
True HR: 100.00, Predicted HR: 75.50, Reward: -24.50
True HR: 98.07, Predicted HR: 64.75, Reward: -33.31
True HR: 100.00, Predicted HR: 73.86, Reward: -26.14
True HR: 97.66, Predicted HR: 66.90, Reward: -30.76
True HR: 100.00, Predicted HR: 128.37, Reward: -28.37
True HR: 99.10, Predicted HR: 100.07, Reward: -0.96
True HR: 100.00, Predicted HR: 158.40, Reward: -58.40
True HR: 100.00, Predicted HR: 77.39, Reward: -22.61
True HR: 100.00, Predicted HR: 77.93, Reward: -22.07
True HR: 93.01, Predicted HR: 144.66, Reward: -51.65
True HR: 100.00, Predicted HR: 69.96, Reward: -30.04
True HR: 100.00, Predicted HR: 100.13, Reward: -0.13
True HR: 100.00, Predicted HR: 100.48, Reward: -0.48
True HR: 96.31, Predicted HR: 99.82, Reward: -3

In [None]:
df = pd.read_csv("speech_features_with_hr.csv")
print(df['simulated_hr'].describe())



count    2703.000000
mean       97.257622
std         2.975958
min        84.093962
25%        94.822858
50%        98.200201
75%       100.000000
max       100.000000
Name: simulated_hr, dtype: float64
