In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import joblib
import numpy as np
import pandas as pd
import lightgbm as lgb


import torch
import torch.nn as nn

import gym
from stable_baselines3 import PPO, A2C, DQN, SAC, TD3
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt
random_state = 6
np.random.seed(random_state)

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

cuda


### Load data

In [4]:
## look data with pandas
train_file = "data/rupturemodel_train.txt"
val_file = "data/rupturemodel_validate.txt"
test_file = "data/rupturemodel_test.txt"

df_train= pd.read_csv(train_file, sep=" ", header = None)
df_val= pd.read_csv(val_file, sep=" ", header = None)
df_test= pd.read_csv(test_file, sep=" ", header = None)

columns =  ['height', 'width', 'sxx', 'sxy', 'syy', 'sdrop', 'mud', 'dc', 'label']
df_train.columns = columns
df_val.columns = columns
df_test.columns = columns

frames = [df_train, df_val]
df_train = pd.concat(frames)
print('train data shape {} and test data shape {}'.format(np.shape(df_train), np.shape(df_test)))

train data shape (1600, 9) and test data shape (400, 9)


### Feature Engineering

In [5]:
def create_new_features(df: pd.DataFrame) -> pd.DataFrame:
    df_new = df.copy()
    # Create new features
    df_new['height_width_ratio'] = df_new['height'] / df_new['width']
    df_new['normal_stress_diff'] = df_new['sxx'] - df_new['syy']
    df_new['friction_product'] = df_new['mud'] * (df_new['sdrop'])
    df_new['stress_ratio'] = df_new['sxy'] / df_new['syy']
    df_new['static_dynamic_friction_diff'] = (
        df_new['mud'] + df_new['sdrop']) - df_new['mud']
    df_new['stress_diff_dynamic_strength'] = df_new['sxy'] - \
        (df_new['syy'] * df_new['mud'])
    df_new['normalized_dc'] = df_new['dc'] / df_new['width']
    return df_new

In [6]:
train_file = "data/rupturemodel_train.txt"
columns = ['height', 'width', 'sxx', 'sxy',
           'syy', 'sdrop', 'mud', 'dc', 'label']
df_train = pd.read_csv(train_file, sep=" ", header=None)
df_train.columns = columns

In [8]:
# Define your reinforcement learning environment
from typing import List

class GeneratorEnv(gym.Env):
    def __init__(self, supervised_model):
        super(GeneratorEnv, self).__init__()
        self.supervised_model = supervised_model
        self.generator_input_size = 100
        self.scaler = joblib.load('./models/scaler.pkl')
        self.state = torch.randn(100).to(device)
        self.action_space = gym.spaces.Box( low = 0, high = 1, shape = (8,), dtype = np.float32)
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(100,), dtype=np.float32)

    def step(self, action):
        self.state = torch.randn(1, self.generator_input_size).to(device)
        self.generator_model.eval()
        processed_data = self.process_for_supervised_model(np.array(action))
        reward = self.supervised_model.predict(processed_data)
        done = False
        info = {}
        return self.state.cpu().numpy(), reward, done, info

    def reset(self):
        self.state = torch.randn(100).to(device)
        return self.state.cpu().numpy()

    def process_for_supervised_model(self, generated_data: np.array) -> np.array:
        # Process the generated data to make it compatible with the supervised model
        columns = ['height', 'width', 'sxx',
                   'sxy', 'syy', 'sdrop', 'mud', 'dc']
        de_normalized = self.scaler.inverse_transform(
            generated_data.reshape(1, -1))  # Reshape to a 2D array
        df = pd.DataFrame(de_normalized, columns=columns)
        df = create_new_features(df)
        return df.values

In [9]:
# Create the custom environment
supervised_model = lgb.Booster(model_file='./models/best_supervised_model.txt')
env = DummyVecEnv([lambda: GeneratorEnv(supervised_model)])

In [10]:
# Train the generator using PPO
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log="./logs/rl_logs/")
model.learn(total_timesteps=100000)

Using cuda device
Logging to ./logs/rl_logs/PPO_21
-----------------------------
| time/              |      |
|    fps             | 322  |
|    iterations      | 1    |
|    time_elapsed    | 6    |
|    total_timesteps | 2048 |
-----------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 306        |
|    iterations           | 2          |
|    time_elapsed         | 13         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.02164761 |
|    clip_fraction        | 0.19       |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.3      |
|    explained_variance   | -0.312     |
|    learning_rate        | 0.0003     |
|    loss                 | 15.2       |
|    n_updates            | 10         |
|    policy_gradient_loss | -0.0564    |
|    std                  | 0.997      |
|    value_loss           | 39.4       |
-

<stable_baselines3.ppo.ppo.PPO at 0x7f865c14b4c0>

In [22]:
model_name = 'rl_model_ppo'

In [23]:
### Save the model with environment
model.save(f'./models/{model_name}')
joblib.dump(env, f'./models/{model_name}_env.joblib')

['./models/rl_model_ppo_env.joblib']

In [13]:
### Load the model
loaded_model = PPO.load(f'./models/{model_name}')
loaded_env = joblib.load(f'./models/{model_name}_env.joblib')

In [14]:
rewards_array = []
generated_data = []
obs = loaded_env.reset()
for i in range(1000):
    action, _states = loaded_model.predict(obs)
    obs, rewards, dones, info = loaded_env.step(action)
    generated_data.append(list(action[0]))
    rewards_array.append(rewards[0])

In [15]:
## Process the generated data to make it compatible with the supervised model
scaler = joblib.load('./models/scaler.pkl')
def process_for_supervised_model(generated_data):
    # Process the generated data to make it compatible with the supervised model
    columns = ['height', 'width', 'sxx',
               'sxy', 'syy', 'sdrop', 'mud', 'dc']
    de_normalized = scaler.inverse_transform(generated_data)  # Reshape to a 2D array
    df = pd.DataFrame(de_normalized, columns=columns)
    df = create_new_features(df)
    return df

data = np.array(generated_data)
df_generated = process_for_supervised_model(data)

In [19]:
df_generated.sample(20)

Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc,height_width_ratio,normal_stress_diff,friction_product,stress_ratio,static_dynamic_friction_diff,stress_diff_dynamic_strength,normalized_dc
64,0.074235,1.07933,-198.643585,97.61821,-159.945221,0.200579,0.351016,0.233045,0.068778,-38.698364,0.070407,-0.610323,0.200579,153.761627,0.215916
131,3e-06,1.000214,-198.643585,97.61821,-159.945221,0.200579,0.200045,0.411556,3e-06,-38.698364,0.040125,-0.610323,0.200579,129.614456,0.411468
958,3e-06,1.000214,-198.643585,97.61821,-159.945221,0.200579,0.27738,0.580134,3e-06,-38.698364,0.055637,-0.610323,0.200579,141.983856,0.58001
804,3e-06,1.000214,-198.643585,97.61821,-43.04372,0.200579,0.307973,0.583352,3e-06,-155.599869,0.061773,-2.267885,0.200579,110.874527,0.583227
699,3e-06,1.000214,-113.520729,97.61821,-159.945221,0.200579,0.406956,0.496606,3e-06,46.424492,0.081627,-0.610323,0.200579,162.708862,0.4965
915,3e-06,1.000214,-122.617569,97.61821,-159.945221,0.200579,0.573047,0.583352,3e-06,37.327652,0.114941,-0.610323,0.200579,189.274338,0.583227
654,3e-06,1.000214,-29.47089,97.61821,-159.945221,0.200579,0.573047,0.583352,3e-06,130.474335,0.114941,-0.610323,0.200579,189.274338,0.583227
68,3e-06,1.000214,-198.643585,97.61821,-114.239517,0.200579,0.573047,0.583352,3e-06,-84.404068,0.114941,-0.854505,0.200579,163.082825,0.583227
106,3e-06,1.000214,-78.017754,97.61821,-159.945221,0.200579,0.573047,0.583352,3e-06,81.927467,0.114941,-0.610323,0.200579,189.274338,0.583227
66,3e-06,1.000214,-154.805099,97.61821,-159.945221,0.200579,0.573047,0.583352,3e-06,5.140121,0.114941,-0.610323,0.200579,189.274338,0.583227


In [20]:
df_train.describe()

Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc,label
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.071747,1.492561,-85.727744,35.405536,-85.29595,0.390677,0.306744,0.401653,0.356
std,0.046195,0.289699,45.301701,20.402395,42.617577,0.112238,0.088545,0.051051,0.479055
min,7e-05,1.000214,-196.579802,2.491208,-159.945235,0.200579,0.200045,0.25448,0.0
25%,0.030697,1.24686,-120.756137,19.565099,-121.961244,0.292298,0.233558,0.369119,0.0
50%,0.068957,1.480246,-83.074266,32.316379,-86.0228,0.387075,0.285329,0.400085,0.0
75%,0.105718,1.739429,-48.382402,49.338103,-48.805945,0.482003,0.357936,0.434763,1.0
max,0.195712,2.08524,-7.933036,97.528776,-10.044879,0.599913,0.573047,0.583352,1.0


In [21]:
df_generated.describe()

Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc,height_width_ratio,normal_stress_diff,friction_product,stress_ratio,static_dynamic_friction_diff,stress_diff_dynamic_strength,normalized_dc
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.00992,1.145984,-152.41098,95.80603,-147.582855,0.213223,0.469055,0.509479,0.00916,-4.828269,0.099655,-0.788919,0.213223,165.131363,0.466428
std,0.0312,0.2968,65.652687,8.517452,30.211641,0.047509,0.13616,0.120401,0.028974,72.507317,0.03478,0.945068,0.047509,26.704126,0.139873
min,3e-06,1.000214,-198.643585,6.414491,-159.945221,0.200579,0.200045,0.233045,1e-06,-188.631592,0.040125,-9.750129,0.200579,38.410732,0.109466
25%,3e-06,1.000214,-198.643585,97.61821,-159.945221,0.200579,0.374204,0.464942,3e-06,-38.698364,0.078491,-0.610323,0.200579,145.352219,0.35009
50%,3e-06,1.000214,-198.643585,97.61821,-159.945221,0.200579,0.568429,0.583352,3e-06,-38.698364,0.114941,-0.610323,0.200579,174.424789,0.552838
75%,3e-06,1.115994,-112.022064,97.61821,-159.945221,0.200579,0.573047,0.583352,3e-06,36.65629,0.114941,-0.610323,0.200579,189.274338,0.583227
max,0.195712,2.128935,-7.933036,97.61821,-10.011992,0.599913,0.573047,0.583352,0.19567,152.012192,0.309726,-0.040104,0.599913,189.274338,0.583227


In [17]:
df_rewards = pd.DataFrame(rewards_array, columns=['reward'])
df_rewards.describe()

Unnamed: 0,reward
count,1000.0
mean,1.109851
std,0.128231
min,0.311787
25%,1.047339
50%,1.137824
75%,1.192418
max,1.398238


In [18]:
# Plot distribution plot using altaire
alt.Chart(df_rewards).mark_bar().encode(
    alt.X('reward', bin=alt.Bin(maxbins=100)),
    y='count()',
).properties(
    width=600,
    height=400
).interactive()

In [None]:
combined_df = pd.concat([df_train, df_generated], ignore_index=True)
combined_df['data_type'] = ['train'] * len(df_train) + ['generated'] * len(df_generated)

In [None]:
# Plot distribution plot using altaire
alt.Chart(combined_df).mark_bar().encode(
    alt.X('width', bin=alt.Bin(maxbins=100)),
    y='count()',
    color='data_type'
).properties(
    width=600,
    height=400
).interactive()
 

In [None]:
df_generated['rewards']  = df_rewards['reward']

In [None]:
alt.Chart(df_generated).mark_rect().encode(
    alt.X('sdrop', bin=alt.Bin(maxbins=100)),
    alt.Y('width', bin=alt.Bin(maxbins=100)),
    alt.Color('rewards', scale=alt.Scale(scheme='redyellowblue'))
).properties(
    width=600,
    height=400
).interactive()


### Understand the parameters of the generator

In [None]:
import optuna
import numpy as np

# Define the objective function to optimize


def objective(trial):
    input_noise = torch.randn(1, 100).to(device)
    generator_model.eval()
    generator_output = generator_model(input_noise)
    generator_output = generator_output.squeeze(
        0).detach().cpu().numpy().reshape(1, -1)
    df_generated = process_for_supervised_model(generator_output)
    reward = supervised_model.predict(df_generated)
    return reward

In [None]:
# Define the optimization study
study = optuna.create_study(direction='maximize')

# Optimize the objective function for a fixed number of trials
n_trials = 1000
study.optimize(objective, n_trials=n_trials, show_progress_bar=False)


In [None]:
# Print the best parameter settings and reward found
best_params = study.best_params
best_reward = study.best_value
print(f"Best parameter settings: {best_params}")
print(f"Best reward: {best_reward}")