In [33]:
import warnings
warnings.filterwarnings("ignore")

In [34]:
import joblib
import numpy as np
import pandas as pd
import lightgbm as lgb


import torch
import torch.nn as nn

from sklearn.preprocessing import StandardScaler

import gym
from stable_baselines3 import PPO, A2C, DQN, SAC, TD3
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt
random_state = 6
np.random.seed(random_state)

In [35]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

cuda


## Load data

In [36]:
## look data with pandas
train_file = "data/rupturemodel_train.txt"
val_file = "data/rupturemodel_validate.txt"
test_file = "data/rupturemodel_test.txt"

df_train = pd.read_csv(train_file, sep=" ", header=None)
df_val = pd.read_csv(val_file, sep=" ", header=None)
df_test = pd.read_csv(test_file, sep=" ", header=None)

columns = ['height', 'width', 'sxx', 'sxy',
           'syy', 'sdrop', 'mud', 'dc', 'label']
df_train.columns = columns
df_val.columns = columns
df_test.columns = columns

frames = [df_train, df_val]
df_train = pd.concat(frames)
print('train data shape {} and test data shape {}'.format(
    np.shape(df_train), np.shape(df_test)))


train data shape (1600, 9) and test data shape (400, 9)


### Feature Engineering

In [4]:
def create_new_features(df: pd.DataFrame) -> pd.DataFrame:
    df_new = df.copy()
    # Create new features
    df_new['height_width_ratio'] = df_new['height'] / df_new['width']
    df_new['normal_stress_diff'] = df_new['sxx'] - df_new['syy']
    df_new['friction_product'] = df_new['mud'] * (df_new['sdrop'])
    df_new['stress_ratio'] = df_new['sxy'] / df_new['syy']
    df_new['static_dynamic_friction_diff'] = (
        df_new['mud'] + df_new['sdrop']) - df_new['mud']
    df_new['stress_diff_dynamic_strength'] = df_new['sxy'] - \
        (df_new['syy'] * df_new['mud'])
    df_new['normalized_dc'] = df_new['dc'] / df_new['width']
    return df_new

In [5]:
# Define your reinforcement learning environment
from typing import List

class GeneratorEnv(gym.Env):
    def __init__(self, supervised_model):
        super(GeneratorEnv, self).__init__()
        self.supervised_model = supervised_model
        self.input_size = 100
        self.scaler = joblib.load('./models/scaler.joblib')
        self.state = torch.randn(100).to(device)
        self.action_space = gym.spaces.Box( low = -5, high = 5, shape = (8,), dtype = np.float32)
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(100,), dtype=np.float32)

    def step(self, action):
        self.state = torch.randn(1, self.input_size).to(device)
        reward = self.get_reward(np.array(action))
        done = False
        info = {}
        return self.state.cpu().numpy(), reward, done, info

    def reset(self):
        self.state = torch.randn(100).to(device)
        return self.state.cpu().numpy()

    def get_reward(self, generated_data: np.array) -> np.array:
        columns = ['height', 'width', 'sxx','sxy', 'syy', 'sdrop', 'mud', 'dc']
        de_normalized = self.scaler.inverse_transform(
            generated_data.reshape(1, -1))
        df = pd.DataFrame(de_normalized, columns=columns)
        df = create_new_features(df)
        predict = self.supervised_model.predict(df.values)
        reward = predict
        
        # If predict is out of range, give negative reward
        if predict < 0.0 and predict > 1.0:
            reward =  -1.0
        else:
            reward = 1.0
        
        # Negative reward if the generated height or width is negative
        if (df['width'].values[0] < 0.0) and (df['height'].values[0] < 0.0):
            reward = -1.0
        elif (df['width'].values[0] < 0.0) or (df['height'].values[0] < 0.0):
            reward = -1.0
            
        return reward

In [6]:
# Create the custom environment
supervised_model = lgb.Booster(model_file='./models/best_supervised_model.txt')
env = DummyVecEnv([lambda: GeneratorEnv(supervised_model)])

In [7]:
# Train the generator using PPO
model_name = 'rl_model_ppo'
policy_kwargs = dict(activation_fn=torch.nn.ReLU,
                     net_arch=dict(pi=[256, 512, 256], vf=[256, 512, 256]))

# Model definition
model = PPO('MlpPolicy', env,
            verbose=0,
            policy_kwargs=policy_kwargs,
            tensorboard_log="./logs/rl_logs/")

In [8]:
# Train the model
model.learn(total_timesteps=500000)

<stable_baselines3.ppo.ppo.PPO at 0x7f86807199d0>

In [9]:
### Save the model
model.save(f'./models/{model_name}')

### Save the environment
joblib.dump(env, f'./models/{model_name}_env.joblib')

['./models/rl_model_ppo_env.joblib']

In [10]:
### Load the model
loaded_model = PPO.load(f'./models/{model_name}')

### Load the environment
loaded_env = joblib.load(f'./models/{model_name}_env.joblib')
loaded_model.set_env(loaded_env)

In [52]:
input_data = torch.randn(5000, 100)
generated_data, _ = loaded_model.predict(input_data)

torch.Size([5000, 100])

In [57]:
## Process the generated data to make it compatible with the supervised model
scaler = joblib.load('./models/scaler.pkl')
def process_for_supervised_model(generated_data):
    # Process the generated data to make it compatible with the supervised model
    columns = ['height', 'width', 'sxx',
               'sxy', 'syy', 'sdrop', 'mud', 'dc']
    de_normalized = scaler.inverse_transform(generated_data)  # Reshape to a 2D array
    df = pd.DataFrame(de_normalized, columns=columns)
    df = create_new_features(df)
    return df

data = np.array(generated_data)
df_generated = process_for_supervised_model(data)

In [58]:
columns = ['height', 'width', 'sxx','sxy', 'syy', 'sdrop', 'mud', 'dc']
df_generated[columns].sample(10)

Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc
2444,0.108005,1.416462,-149.943481,10.580321,-221.572159,0.363391,-0.063238,0.592588
4465,0.045519,1.53129,-169.45192,40.637573,-34.323097,0.33463,0.376317,-0.135983
2352,0.073967,-0.624888,-412.624023,-116.62664,-246.830505,0.532091,-0.452543,0.26024
3698,0.056905,0.439508,-469.853729,-23.065411,-183.476395,0.216209,0.327296,0.250585
4321,0.125804,1.988614,-394.297516,87.419151,-53.784492,-0.034445,-0.46036,-0.040936
3194,0.212858,0.826991,-316.542969,-108.880051,-39.4977,0.395484,0.168112,0.18243
1659,0.074788,-0.802342,-404.02005,-54.650021,-67.54879,0.201346,0.616049,0.116728
1033,-0.008465,1.53184,-444.65686,64.217567,112.802803,0.269359,0.454642,0.568043
930,0.15066,1.676377,-416.31839,100.756142,-35.964104,0.161665,-0.24463,-0.024999
3763,0.094055,1.556148,-168.500763,-7.727247,64.353188,0.320721,-0.035798,0.366916


In [59]:
#Predict the generated data using the supervised model
generated_predict = supervised_model.predict(df_generated.values)
df_generated['label'] = generated_predict

In [71]:
generated_final = df_generated[(df_generated['height'] > 0.0) &
                               (df_generated['width'] > 0.0) &
                               ((df_generated['label'] >= 0.0) & (df_generated['label'] <= 1.0))]


In [75]:
# Plot distribution plot using altaire
alt.Chart(generated_final).mark_bar().encode(
    alt.X('label', bin=alt.Bin(maxbins=100)),
    y='count()',
).properties(
    width=600,
    height=400
).interactive()

In [76]:
combined_df = pd.concat([df_train, generated_final], ignore_index=True)
combined_df['data_type'] = ['train'] * \
    len(df_train) + ['generated'] * len(generated_final)


In [81]:
combined_df.to_csv('./data/train_generated_data.csv', index=False)

In [80]:
combined_df.head()


Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc,label,height_width_ratio,normal_stress_diff,friction_product,stress_ratio,static_dynamic_friction_diff,stress_diff_dynamic_strength,normalized_dc,data_type
0,0.103861,1.145663,-102.509086,58.619371,-117.766562,0.483821,0.216681,0.295842,0.0,,,,,,,,train
1,0.088714,1.30436,-136.06227,51.391037,-126.715571,0.345944,0.447964,0.406466,1.0,,,,,,,,train
2,0.099706,1.260377,-117.558936,40.972081,-115.529343,0.292719,0.501697,0.38936,1.0,,,,,,,,train
3,0.115749,1.191782,-128.169036,94.020712,-157.830504,0.57171,0.202831,0.408976,0.0,,,,,,,,train
4,0.0179,1.10815,-106.35032,29.148969,-101.379323,0.253122,0.324653,0.398592,1.0,,,,,,,,train


In [79]:
# Plot distribution plot using altaire
alt.Chart(combined_df.sample(4000)).mark_bar().encode(
    alt.X('dc', bin=alt.Bin(maxbins=100)),
    y='count()',
    color='data_type'
).properties(
    width=600,
    height=400
).interactive()

### Understand the parameters of the generator

In [None]:
import optuna
import numpy as np

# Define the objective function to optimize

def objective(trial):
    input_noise = torch.randn(1, 100).to(device)
    generator_model.eval()
    generator_output = generator_model(input_noise)
    generator_output = generator_output.squeeze(
        0).detach().cpu().numpy().reshape(1, -1)
    df_generated = process_for_supervised_model(generator_output)
    reward = supervised_model.predict(df_generated)
    return reward

In [None]:
# Define the optimization study
study = optuna.create_study(direction='maximize')

# Optimize the objective function for a fixed number of trials
n_trials = 1000
study.optimize(objective, n_trials=n_trials, show_progress_bar=False)


In [None]:
# Print the best parameter settings and reward found
best_params = study.best_params
best_reward = study.best_value
print(f"Best parameter settings: {best_params}")
print(f"Best reward: {best_reward}")