In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import joblib
import numpy as np
import pandas as pd
import lightgbm as lgb


import torch
import torch.nn as nn

import gym
from stable_baselines3 import PPO, A2C, DQN, SAC, TD3
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt
random_state = 6
np.random.seed(random_state)

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

cuda


### Load data

In [4]:
## look data with pandas
train_file = "data/rupturemodel_train.txt"
val_file = "data/rupturemodel_validate.txt"
test_file = "data/rupturemodel_test.txt"

df_train= pd.read_csv(train_file, sep=" ", header = None)
df_val= pd.read_csv(val_file, sep=" ", header = None)
df_test= pd.read_csv(test_file, sep=" ", header = None)

columns =  ['height', 'width', 'sxx', 'sxy', 'syy', 'sdrop', 'mud', 'dc', 'label']
df_train.columns = columns
df_val.columns = columns
df_test.columns = columns

frames = [df_train, df_val]
df_train = pd.concat(frames)
print('train data shape {} and test data shape {}'.format(np.shape(df_train), np.shape(df_test)))

train data shape (1600, 9) and test data shape (400, 9)


### Feature Engineering

In [5]:
def create_new_features(df: pd.DataFrame) -> pd.DataFrame:
    df_new = df.copy()
    # Create new features
    df_new['height_width_ratio'] = df_new['height'] / df_new['width']
    df_new['normal_stress_diff'] = df_new['sxx'] - df_new['syy']
    df_new['friction_product'] = df_new['mud'] * (df_new['sdrop'])
    df_new['stress_ratio'] = df_new['sxy'] / df_new['syy']
    df_new['static_dynamic_friction_diff'] = (
        df_new['mud'] + df_new['sdrop']) - df_new['mud']
    df_new['stress_diff_dynamic_strength'] = df_new['sxy'] - \
        (df_new['syy'] * df_new['mud'])
    df_new['normalized_dc'] = df_new['dc'] / df_new['width']
    return df_new

In [6]:
train_file = "data/rupturemodel_train.txt"
columns = ['height', 'width', 'sxx', 'sxy',
           'syy', 'sdrop', 'mud', 'dc', 'label']
df_train = pd.read_csv(train_file, sep=" ", header=None)
df_train.columns = columns

In [7]:
# Generator
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, output_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)


g_input_dim = 100
g_output_dim = 8
generator = Generator(g_input_dim, g_output_dim).to(device)


In [36]:
# Define your reinforcement learning environment
class GeneratorEnv(gym.Env):
    def __init__(self, generator_model, supervised_model):
        super(GeneratorEnv, self).__init__()
        self.generator_model = generator_model.to(device)
        self.supervised_model = supervised_model
        self.generator_input_size = 100
        self.scaler = joblib.load('./models/scaler.pkl')
        self.state = torch.randn(100).to(device)
        self.action_space = gym.spaces.Box( low = 0, high = 1, shape = (8,), dtype = np.float32)
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(100,), dtype=np.float32)

    def step(self, action):
        self.state = torch.randn(1, self.generator_input_size).to(device)
        self.generator_model.eval()
        output = self.generator_model(self.state)
        processed_data = self.process_for_supervised_model(
            output.squeeze(0).detach().cpu())
        reward = self.supervised_model.predict(processed_data)
        done = False
        info = {}
        return self.state.cpu().numpy(), reward, done, info

    def reset(self):
        self.state = torch.randn(100).to(device)
        return self.state.cpu().numpy()

    def process_for_supervised_model(self, generated_data: torch.Tensor) -> np.array:
        # Process the generated data to make it compatible with the supervised model
        columns = ['height', 'width', 'sxx',
                   'sxy', 'syy', 'sdrop', 'mud', 'dc']
        generated_data = generated_data.numpy()
        de_normalized = self.scaler.inverse_transform(
            generated_data.reshape(1, -1))  # Reshape to a 2D array
        df = pd.DataFrame(de_normalized, columns=columns)
        df = create_new_features(df)
        return df.values

In [37]:
input_dim = 8  # Modify this value based on your generator's input dimension
output_dim = input_dim  # Modify this value based on your generator's output dimension
# Load the generator and supervised models
generator_model = torch.load('./models/best_generator.pth').to(device)
supervised_model = lgb.Booster(model_file='./models/best_supervised_model.txt')
# Create the custom environment
env = DummyVecEnv([lambda: GeneratorEnv(generator_model, supervised_model)])

In [38]:
# Train the generator using PPO
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log="./logs/rl_logs/")
model.learn(total_timesteps=200000)

Using cuda device


ValueError: could not broadcast input array from shape (100,) into shape (100,1)

In [25]:
### Save the model with environment
model_name = 'rl_model_pp0_generator'
model.save(f'./models/{model_name}')

In [26]:
### Load the model
loaded_model = PPO.load(f'./models/{model_name}')
loaded_env = model.get_env()

In [27]:
rewards_array = []
generated_data = []
obs = loaded_env.reset()
for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = loaded_env.step(action)
    generated_data.append(list(action[0]))
    rewards_array.append(rewards[0])

In [28]:
## Process the generated data to make it compatible with the supervised model
scaler = joblib.load('./models/scaler.pkl')
def process_for_supervised_model(generated_data):
    # Process the generated data to make it compatible with the supervised model
    columns = ['height', 'width', 'sxx',
               'sxy', 'syy', 'sdrop', 'mud', 'dc']
    de_normalized = scaler.inverse_transform(generated_data)  # Reshape to a 2D array
    df = pd.DataFrame(de_normalized, columns=columns)
    df = create_new_features(df)
    return df

data = np.array(generated_data)
df_generated = process_for_supervised_model(data)

In [29]:
df_train.describe()

Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc,label
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.071747,1.492561,-85.727744,35.405536,-85.29595,0.390677,0.306744,0.401653,0.356
std,0.046195,0.289699,45.301701,20.402395,42.617577,0.112238,0.088545,0.051051,0.479055
min,7e-05,1.000214,-196.579802,2.491208,-159.945235,0.200579,0.200045,0.25448,0.0
25%,0.030697,1.24686,-120.756137,19.565099,-121.961244,0.292298,0.233558,0.369119,0.0
50%,0.068957,1.480246,-83.074266,32.316379,-86.0228,0.387075,0.285329,0.400085,0.0
75%,0.105718,1.739429,-48.382402,49.338103,-48.805945,0.482003,0.357936,0.434763,1.0
max,0.195712,2.08524,-7.933036,97.528776,-10.044879,0.599913,0.573047,0.583352,1.0


In [30]:
df_generated.describe()

Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc,height_width_ratio,normal_stress_diff,friction_product,stress_ratio,static_dynamic_friction_diff,stress_diff_dynamic_strength,normalized_dc
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.063394,1.296227,-137.10878,36.248058,-110.833115,0.331662,0.324279,0.354454,0.051309,-26.276779,0.108106,-0.901348,0.331662,72.316261,0.294753
std,0.079161,0.427622,77.197952,40.059795,58.650623,0.16148,0.153558,0.143669,0.067623,93.132484,0.078602,2.035063,0.16148,46.748653,0.139064
min,3e-06,1.000214,-198.643585,2.388038,-159.945221,0.200579,0.200045,0.233045,1e-06,-188.631592,0.040125,-9.750129,0.200579,4.390887,0.109466
25%,3e-06,1.000214,-198.643585,2.388038,-159.945221,0.200579,0.200045,0.233045,3e-06,-81.14333,0.042511,-0.610323,0.200579,34.384281,0.228989
50%,0.001201,1.000214,-198.643585,9.043187,-147.081818,0.213705,0.205554,0.259465,0.000991,-38.698364,0.09097,-0.232437,0.213705,67.792137,0.232995
75%,0.141214,1.563697,-63.473439,82.603052,-59.375022,0.484021,0.484058,0.497733,0.093977,28.048768,0.12001,-0.017088,0.484021,103.58173,0.368533
max,0.195712,2.128935,-7.933036,97.61821,-10.011992,0.599913,0.573047,0.583352,0.19567,152.012192,0.343778,-0.01493,0.599913,189.274338,0.583227


In [35]:
train_columns = ['height', 'width', 'sxx',
           'sxy', 'syy', 'sdrop', 'mud', 'dc']
df_generated[train_columns].sample(10)


Unnamed: 0,height,width,sxx,sxy,syy,sdrop,mud,dc
48,0.171282,1.000214,-198.643585,2.388038,-159.945221,0.30604,0.206193,0.271141
703,3e-06,1.55541,-198.643585,36.584038,-10.011992,0.599913,0.278166,0.348336
282,3e-06,1.000214,-142.781708,32.800877,-95.948616,0.200579,0.200045,0.233045
112,3e-06,1.955054,-198.643585,94.455864,-159.945221,0.599913,0.200045,0.583352
920,3e-06,1.000214,-198.643585,2.388038,-159.945221,0.599913,0.200045,0.583352
46,0.054781,2.128935,-198.643585,60.208935,-66.3955,0.535698,0.573047,0.233045
391,3e-06,1.151725,-198.643585,97.61821,-159.945221,0.315802,0.200045,0.464034
107,0.146542,1.000214,-198.643585,97.61821,-55.660603,0.200579,0.573047,0.233045
316,3e-06,2.128935,-198.643585,2.388038,-139.108246,0.200579,0.200045,0.360395
142,0.195712,1.000214,-7.933036,2.388038,-10.011992,0.200579,0.200045,0.569919


In [31]:
df_rewards = pd.DataFrame(rewards_array, columns=['reward'])
df_rewards.describe()

Unnamed: 0,reward
count,1000.0
mean,0.676064
std,0.303125
min,-0.231509
25%,0.493496
50%,0.727863
75%,0.903609
max,1.323363


In [32]:
# Plot distribution plot using altaire
alt.Chart(df_rewards).mark_bar().encode(
    alt.X('reward', bin=alt.Bin(maxbins=100)),
    y='count()',
).properties(
    width=600,
    height=400
).interactive()

In [None]:
combined_df = pd.concat([df_train, df_generated], ignore_index=True)
combined_df['data_type'] = ['train'] * len(df_train) + ['generated'] * len(df_generated)

In [None]:
# Plot distribution plot using altaire
alt.Chart(combined_df).mark_bar().encode(
    alt.X('width', bin=alt.Bin(maxbins=100)),
    y='count()',
    color='data_type'
).properties(
    width=600,
    height=400
).interactive()
 

In [None]:
df_generated['rewards']  = df_rewards['reward']

In [None]:
alt.Chart(df_generated).mark_rect().encode(
    alt.X('sdrop', bin=alt.Bin(maxbins=100)),
    alt.Y('width', bin=alt.Bin(maxbins=100)),
    alt.Color('rewards', scale=alt.Scale(scheme='redyellowblue'))
).properties(
    width=600,
    height=400
).interactive()


### Understand the parameters of the generator

In [None]:
import optuna
import numpy as np

# Define the objective function to optimize


def objective(trial):
    input_noise = torch.randn(1, 100).to(device)
    generator_model.eval()
    generator_output = generator_model(input_noise)
    generator_output = generator_output.squeeze(
        0).detach().cpu().numpy().reshape(1, -1)
    df_generated = process_for_supervised_model(generator_output)
    reward = supervised_model.predict(df_generated)
    return reward

In [None]:
# Define the optimization study
study = optuna.create_study(direction='maximize')

# Optimize the objective function for a fixed number of trials
n_trials = 1000
study.optimize(objective, n_trials=n_trials, show_progress_bar=False)


In [None]:
# Print the best parameter settings and reward found
best_params = study.best_params
best_reward = study.best_value
print(f"Best parameter settings: {best_params}")
print(f"Best reward: {best_reward}")