In [211]:
# Prepare files to train surrogate

import os
import numpy as np
import pandas as pd
import json

def read_json(filename):
    """Read a JSON file and return its content as a dictionary."""
    with open(filename, 'r') as f:
        return json.load(f)

In [212]:
# Current working directory
cwd = os.getcwd()

# Here, define the location for the quoFEM files
save_files_to = os.path.join(cwd, 'surrogate_training_data')

# Path to calibrated curves
path_to_calibrations = os.path.join(cwd, 'calibrated_curves_02')
all_cals = os.listdir(path_to_calibrations)

# Read the first calibration file
data_json = read_json(os.path.join(path_to_calibrations, all_cals[1]))
cal_data = pd.DataFrame(data_json['best_fit'], index=[0])

# Add the ID as a new column
id = data_json['Name']
cal_data['name'] = id
cal_data['PeakDrift'] = max(abs(np.array(data_json['data']['disp'])))/data_json['L_Inflection']
cal_data['FailureType'] = data_json['FailureType']

cal_data_df = pd.DataFrame(cal_data)

for cal in all_cals[2:]:
    data_json = read_json(os.path.join(path_to_calibrations, cal))
    
    cal_data = pd.DataFrame(data_json['best_fit'], index=[0])
    id = data_json['Name']
    
    # Add the ID as a new column
    id = data_json['Name']
    cal_data['name'] = id
    cal_data['PeakDrift'] = max(abs(np.array(data_json['data']['disp'])))/data_json['L_Inflection']
    cal_data['FailureType'] = data_json['FailureType']

    # Just store the cases where the failure type is flexure
    failuretype = data_json['FailureType']
    #if failuretype != 'Flexure':
    cal_data_df = pd.concat([cal_data_df, cal_data], ignore_index=True)

#Explore the dataframe... So far it only has the calibrated parameters
cal_data_df

Unnamed: 0,eta1,kappa_k,kappa,sig,lam,mup,sigp,rsmax,n,alpha,alpha1,alpha2,betam1,gamma,name,PeakDrift,FailureType
0,1.148932,1.780301,1.020907,0.348714,0.518051,1.490134,2.723279,0.948992,2.622910,0.002713,4.644326,0.049737,0.002159,1.729762,"Davey 1975, No. 1",0.043364,Flexure
1,0.630606,0.841674,0.987722,0.227504,0.507170,1.211439,2.126357,0.762523,2.015740,0.014184,5.639288,1.365753,0.009515,0.921419,"Davey 1975, No. 2",0.057183,Flexure
2,1.478835,5.314510,0.976617,0.386420,0.748542,2.441889,0.314088,0.637786,1.198469,0.007345,4.025868,1.336204,0.005919,1.376716,"Davey 1975, No. 3",0.035757,Flexure
3,1.617294,0.551926,1.017993,0.142420,0.649311,1.039210,0.494608,0.532812,3.299630,0.017042,4.291932,0.700062,0.006451,1.478985,"Munro et al. 1976, No. 1",0.055538,Flexure
4,1.138895,0.615178,0.990485,0.127393,0.607689,0.419765,1.385741,0.966630,1.303237,0.005371,7.799056,0.414857,0.005461,0.597973,"Ng et al. 1978, No. 2",0.078142,Flexure
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,0.801189,1.986198,1.000156,0.275466,0.329429,2.668027,1.747025,0.714904,1.013788,0.002320,9.877437,0.023947,0.000690,0.190341,"Hamilton, 2002, UCI5",0.040782,Flexure-Shear
140,1.697304,0.504133,0.985804,0.131875,0.192101,4.980451,4.926214,0.574142,1.005974,0.001677,1.868030,0.997237,0.028550,0.894524,"Hamilton, 2002, UCI6",0.129975,Flexure
141,1.184817,0.908320,0.959475,0.396224,0.256509,0.531340,0.317291,0.830325,1.341692,0.022070,7.216451,0.270510,0.006894,0.278082,"McDaniel, 1997, S1",0.024549,Shear
142,1.267335,2.678181,1.044735,0.389762,0.325598,0.386566,0.517464,0.993302,6.535933,0.002377,9.966063,0.336192,0.020178,0.206365,"McDaniel, 1997, S1-2",0.010622,Shear


In [213]:
# Load the nondimensional parameters and merge with with the calibrated parameters

# load csv file with the nondimensional parameters
nondim_params = pd.read_csv('data_spiral_wnd.csv')  # This includes all the data... we want just the last 6 columns (nondimensional parameters)
names = nondim_params['name']
nondim_params = nondim_params.iloc[:, -6::]
nondim_params['name'] = names
nondim_params

Unnamed: 0,ar,lrr,srr,alr,sdr,smr,name
0,0.181818,0.304296,0.010831,0.058293,0.600000,0.429439,"Davey 1975, No. 1"
1,0.285714,0.288749,0.010579,0.055613,0.600000,0.528389,"Davey 1975, No. 2"
2,0.153846,0.298894,0.011766,0.057258,0.600000,0.369010,"Davey 1975, No. 3"
3,0.183150,0.206522,0.018635,0.003361,1.411765,0.115615,"Munro et al. 1976, No. 1"
4,0.186567,0.234963,0.016475,0.009809,1.885714,0.123810,"Ng et al. 1978, No. 2"
...,...,...,...,...,...,...,...
158,0.387860,0.151781,0.015829,0.000000,0.425197,0.243059,"Hamilton, 2002, UCI5"
159,0.219178,0.150928,0.016565,0.000000,0.849057,0.109021,"Hamilton, 2002, UCI6"
160,0.500000,0.207288,0.002685,0.002162,0.289370,0.535311,"McDaniel, 1997, S1"
161,0.500000,0.230492,0.002831,0.002403,0.289370,0.561034,"McDaniel, 1997, S1-2"


In [214]:
# Merge the two dataframes on the 'name' column
merged_data = pd.merge(cal_data_df, nondim_params, on='name')

# Save the merged data to a new CSV file
merged_data.to_csv('merged_data.csv', index=False)

merged_data

Unnamed: 0,eta1,kappa_k,kappa,sig,lam,mup,sigp,rsmax,n,alpha,...,gamma,name,PeakDrift,FailureType,ar,lrr,srr,alr,sdr,smr
0,1.148932,1.780301,1.020907,0.348714,0.518051,1.490134,2.723279,0.948992,2.622910,0.002713,...,1.729762,"Davey 1975, No. 1",0.043364,Flexure,0.181818,0.304296,0.010831,0.058293,0.600000,0.429439
1,0.630606,0.841674,0.987722,0.227504,0.507170,1.211439,2.126357,0.762523,2.015740,0.014184,...,0.921419,"Davey 1975, No. 2",0.057183,Flexure,0.285714,0.288749,0.010579,0.055613,0.600000,0.528389
2,1.478835,5.314510,0.976617,0.386420,0.748542,2.441889,0.314088,0.637786,1.198469,0.007345,...,1.376716,"Davey 1975, No. 3",0.035757,Flexure,0.153846,0.298894,0.011766,0.057258,0.600000,0.369010
3,1.617294,0.551926,1.017993,0.142420,0.649311,1.039210,0.494608,0.532812,3.299630,0.017042,...,1.478985,"Munro et al. 1976, No. 1",0.055538,Flexure,0.183150,0.206522,0.018635,0.003361,1.411765,0.115615
4,1.138895,0.615178,0.990485,0.127393,0.607689,0.419765,1.385741,0.966630,1.303237,0.005371,...,0.597973,"Ng et al. 1978, No. 2",0.078142,Flexure,0.186567,0.234963,0.016475,0.009809,1.885714,0.123810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,0.801189,1.986198,1.000156,0.275466,0.329429,2.668027,1.747025,0.714904,1.013788,0.002320,...,0.190341,"Hamilton, 2002, UCI5",0.040782,Flexure-Shear,0.387860,0.151781,0.015829,0.000000,0.425197,0.243059
140,1.697304,0.504133,0.985804,0.131875,0.192101,4.980451,4.926214,0.574142,1.005974,0.001677,...,0.894524,"Hamilton, 2002, UCI6",0.129975,Flexure,0.219178,0.150928,0.016565,0.000000,0.849057,0.109021
141,1.184817,0.908320,0.959475,0.396224,0.256509,0.531340,0.317291,0.830325,1.341692,0.022070,...,0.278082,"McDaniel, 1997, S1",0.024549,Shear,0.500000,0.207288,0.002685,0.002162,0.289370,0.535311
142,1.267335,2.678181,1.044735,0.389762,0.325598,0.386566,0.517464,0.993302,6.535933,0.002377,...,0.206365,"McDaniel, 1997, S1-2",0.010622,Shear,0.500000,0.230492,0.002831,0.002403,0.289370,0.561034


In [215]:
# Generate filters
# Filter 1: Peak drift < 0.04
filter1 = merged_data['PeakDrift'] < 1.0

# Filter 2: Failure type is flexure
filter2 = merged_data['FailureType'] == 'Flexure'

# Apply the filters
filtered_data = merged_data[filter1 & filter2]

# Randomly shuffle the data
merged_data_shuffle = filtered_data.sample(frac=1, random_state=1).reset_index(drop=True)

# Generate a split for training and testing
split = 0.75
train_data = merged_data_shuffle.iloc[:int(split*len(merged_data_shuffle)), :]
test_data = merged_data_shuffle.iloc[int(split*len(merged_data_shuffle)):, :]

# These are the indices for the calibrated parameters and the nondimensional parameters (column)
# DO NOT MODIFY THESE
cal_params_index = np.arange(0, 14)
nd_params_index = np.arange(17, 23)

# Extract the calibrated and nondimensional parameters for both training and testing
cal_params_all = merged_data.iloc[:, cal_params_index]
nondim_params_all = merged_data.iloc[:, nd_params_index]

# Extract just the training data
cal_params_train = train_data.iloc[:, cal_params_index]
nondim_params_train = train_data.iloc[:, nd_params_index]

# Extract just the testing data
cal_params_test = test_data.iloc[:, cal_params_index]
nondim_params_test = test_data.iloc[:, nd_params_index]

print('We have {} training samples and {} testing samples'.format(len(train_data), len(test_data)))

We have 43 training samples and 15 testing samples


In [216]:
# Save to separate txt files with 5 decimal places

# Create folder to save files with the configuration code
config_code = 'flexure_pd_100'

if not os.path.exists(os.path.join('quoFEM_Surrogate',config_code)):
    os.makedirs(os.path.join('quoFEM_Surrogate',config_code))

# Save training data
nondim_params_train.to_csv(os.path.join('quoFEM_Surrogate',config_code,'input_train.txt'), 
                     sep='\t', 
                     index=False, 
                     float_format='%.5f'
                     )

cal_params_train.to_csv(os.path.join('quoFEM_Surrogate',config_code,'output_train.txt'), 
                  sep='\t', 
                  index=False, 
                  float_format='%.5f'
                  )

# Save testing data
nondim_params_test.to_csv(os.path.join('quoFEM_Surrogate',config_code,'input_test.txt'), 
                     sep='\t', 
                     index=False, 
                     float_format='%.5f'
                     )

cal_params_test.to_csv(os.path.join('quoFEM_Surrogate',config_code,'output_test.txt'),
                    sep='\t', 
                    index=False, 
                    float_format='%.5f'
                    )