In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import os
from datetime import datetime
import torch
import sys
sys.path.append('../..')
from modules.many_features import utils, constants
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

In [3]:
sizes = [0.5, 0.1, 0.05, 0.01]
for size in sizes:
    train_df = pd.read_csv(f'../../final/data/train_set_{size}')
    print(f'Size {size} has {len(train_df)} samples')
    train_df = train_df.fillna(-1)
    X_train = train_df.iloc[:, 0:-1]
    y_train = train_df.iloc[:, -1]
    X_train, y_train = np.array(X_train), np.array(y_train)
    dqn_model = utils.stable_dqn3(X_train, y_train, 6000000, True, f'../../final/models/dqn_set_{size}')

Size 0.5 has 28000 samples
using stable baselines 3
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.44     |
|    ep_rew_mean      | -0.84    |
|    exploration_rate | 0.506    |
|    success_rate     | 0.12     |
| time/               |          |
|    episodes         | 100000   |
|    fps              | 1534     |
|    time_elapsed     | 203      |
|    total_timesteps  | 312050   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.63     |
|    n_updates        | 65512    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.38     |
|    ep_rew_mean      | -0.98    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.1      |
| time/               |          |
|    episodes         | 200000   |
|    fps      

Size 0.1 has 5600 samples
using stable baselines 3
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.33     |
|    ep_rew_mean      | -0.9     |
|    exploration_rate | 0.511    |
|    success_rate     | 0.08     |
| time/               |          |
|    episodes         | 100000   |
|    fps              | 1568     |
|    time_elapsed     | 196      |
|    total_timesteps  | 308774   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.06     |
|    n_updates        | 64693    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.32     |
|    ep_rew_mean      | -0.96    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.16     |
| time/               |          |
|    episodes         | 200000   |
|    fps       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.02     |
|    ep_rew_mean      | -1       |
|    exploration_rate | 0.05     |
|    success_rate     | 0.1      |
| time/               |          |
|    episodes         | 200000   |
|    fps              | 1229     |
|    time_elapsed     | 575      |
|    total_timesteps  | 707536   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 17.2     |
|    n_updates        | 164383   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.72     |
|    ep_rew_mean      | -1       |
|    exploration_rate | 0.05     |
|    success_rate     | 0.12     |
| time/               |          |
|    episodes         | 300000   |
|    fps              | 1137     |
|    time_elapsed     | 939      |
|    total_timesteps  | 1068788  |
| train/              |          |
|    learning_rate  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.1      |
|    ep_rew_mean      | -0.96    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.12     |
| time/               |          |
|    episodes         | 300000   |
|    fps              | 1114     |
|    time_elapsed     | 949      |
|    total_timesteps  | 1057598  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.743    |
|    n_updates        | 251899   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.03     |
|    ep_rew_mean      | -0.46    |
|    exploration_rate | 0.05     |
|    success_rate     | 0.31     |
| time/               |          |
|    episodes         | 400000   |
|    fps              | 1061     |
|    time_elapsed     | 1395     |
|    total_timesteps  | 1481439  |
| train/              |          |
|    learning_rate  

In [None]:
sizes = [0.5, 0.1, 0.05, 0.01]
for size in sizes:
    train_df = pd.read_csv(f'../../final/data/train_set_noisy_6_{size}')
    print(f'Size {size} has {len(train_df)} samples')
    train_df = train_df.fillna(-1)
    X_train = train_df.iloc[:, 0:-1]
    y_train = train_df.iloc[:, -1]
    X_train, y_train = np.array(X_train), np.array(y_train)
    dqn_model = utils.stable_dqn3(X_train, y_train, 12000000, True, f'../../final/models/dqn_set_noisy_6_{size}')

Size 0.5 has 28000 samples
using stable baselines 3
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.38     |
|    ep_rew_mean      | -0.94    |
|    exploration_rate | 0.771    |
|    success_rate     | 0.03     |
| time/               |          |
|    episodes         | 100000   |
|    fps              | 880      |
|    time_elapsed     | 327      |
|    total_timesteps  | 288778   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.01     |
|    n_updates        | 59694    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.78     |
|    ep_rew_mean      | -0.92    |
|    exploration_rate | 0.502    |
|    success_rate     | 0.06     |
| time/               |          |
|    episodes         | 200000   |
|    fps      