In [46]:
# Prepare files to train surrogate
import os
import numpy as np
import pandas as pd
import json


def read_json(filename):
    """Read a JSON file and return its content as a dictionary."""
    with open(filename, 'r') as f:
        return json.load(f)

In [47]:
# Current working directory
cwd = os.getcwd()

# Path to calibrated curves
path_to_calibrations = os.path.join(cwd, 'cals_dr_005')
all_cals = os.listdir(path_to_calibrations)

# Read the first calibration file
data_json = read_json(os.path.join(path_to_calibrations, all_cals[0]))
cal_data = pd.DataFrame(data_json['best_fit'], index=[0])

# Set id to be the last 3 digits of the calibration file name begore the ".json"
uniqueId = all_cals[0].split('_')[-1].split('.')[0]

# Get name of test in the json file
name = data_json['Name']

# Add properties of test to the cal_data dataframe
cal_data['UniqueId'] = uniqueId
cal_data['Name'] = name
cal_data['PeakDrift'] = max(abs(np.array(data_json['data']['disp'])))/data_json['L_Inflection']
cal_data['FailureType'] = data_json['FailureType']

# Add the first calibration data to the dataframe
cal_data_df = pd.DataFrame(cal_data)

# Loop through the rest of the calibration files
for cal in all_cals[1:]:
    # Read the calibration file
    data_json = read_json(os.path.join(path_to_calibrations, cal))
    cal_data = pd.DataFrame(data_json['best_fit'], index=[0])

    # Set id to be the last 3 digits of the calibration file name before the ".json"
    uniqueId = cal.split('_')[-1].split('.')[0]

    # Get name of test in the json file
    name = data_json['Name']
    
    # Add properties of test to the cal_data dataframe
    cal_data['UniqueId'] = uniqueId
    cal_data['Name'] = name
    cal_data['PeakDrift'] = max(abs(np.array(data_json['data']['disp'])))/data_json['L_Inflection']
    cal_data['FailureType'] = data_json['FailureType']

    # Just store the cases where the failure type is flexure
    failuretype = data_json['FailureType']
    #if failuretype != 'Flexure':
    cal_data_df = pd.concat([cal_data_df, cal_data], ignore_index=True)

# Explore the dataframe... So far it only has the calibrated parameters
cal_data_df.head()

# Capitals are used for the identification parameters. Non-capital for the calibration parameters.

Unnamed: 0,eta1,kappa_k,kappa,sig,lam,mup,sigp,rsmax,n,alpha,alpha1,alpha2,betam1,gamma,UniqueId,Name,PeakDrift,FailureType
0,1.389757,0.977672,1.000629,0.526346,0.623168,2.668564,1.396805,0.734672,7.145517,0.010004,1.159895,1.691475,0.008341,0.839781,222,"Paultre et al., 2001, No. 1206040",0.081175,Flexure
1,1.504931,2.83592,0.945469,0.277903,0.856977,6.966566,4.094144,0.301638,1.752142,0.008887,4.569456,0.086489,0.001917,0.552348,367,"Kowalsky et al. 1996, FL2",0.058447,Flexure
2,1.091759,0.104649,0.938796,0.468762,0.483361,1.814832,1.840491,0.555405,1.439374,0.018067,4.269103,0.79368,0.00225,0.330463,275,"Ang et al. 1985, No. 9",0.065567,Flexure
3,1.400475,0.862037,1.085792,0.394508,0.54297,8.616623,8.545101,0.927175,6.54685,0.000499,1.260977,1.550322,0.002756,0.456066,330,"Arakawa et al. 1987, No. 13",0.033528,Shear
4,1.89728,1.027857,0.985971,0.501203,0.646731,7.290701,0.413791,0.401243,7.526214,0.002171,2.814609,0.587205,0.001491,0.552913,19,"Tanaka and Park 1990, No. 2",0.080831,Flexure


In [48]:
# Load the nondimensional parameters and merge with with the calibrated parameters

# load csv file with the nondimensional parameters
nondim_params_spiral = pd.read_csv('data_spiral_wnd.csv')  # This includes all the data... we want just the last 6 columns (nondimensional parameters)
nondim_params_rect = pd.read_csv('data_rect_wnd.csv') 

# loop over ids and turn them into strings with zfill(3)
for i in range(len(nondim_params_spiral)):
    nondim_params_spiral['id'][i] = str(int(nondim_params_spiral['id'][i])).zfill(3)

for i in range(len(nondim_params_rect)):
    nondim_params_rect['id'][i] = str(int(nondim_params_rect['id'][i])).zfill(3)

# Add columns to spiral dataframe
uniqueIds_spiral = nondim_params_spiral['id']
nondim_params_spiral = nondim_params_spiral.iloc[:, -6::]
nondim_params_spiral['UniqueId'] = uniqueIds_spiral
nondim_params_spiral['Type'] = 'Spiral'

# Add columns to rect dataframe
uniqueIds_rect = nondim_params_rect['id']
nondim_params_rect = nondim_params_rect.iloc[:, -6::]
nondim_params_rect['UniqueId'] = uniqueIds_rect
nondim_params_rect['Type'] = 'Rectangular'

# Concatenate the two dataframes
nondim_params = pd.concat([nondim_params_spiral, nondim_params_rect], ignore_index=True)
nondim_params


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nondim_params_spiral['id'][i] = str(int(nondim_params_spiral['id'][i])).zfill(3)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nondim_params_rect['id'][i] = str(int(nondim_params_rect['id'][i])).zfill(3)


Unnamed: 0,ar,lrr,srr,alr,sdr,smr,UniqueId,Type
0,2.00,0.241822,0.104983,0.368369,0.572917,0.484365,266,Spiral
1,2.00,0.254624,0.020775,0.000000,1.666667,0.939881,269,Spiral
2,2.50,0.387556,0.021468,0.000000,1.666667,1.263461,270,Spiral
3,2.00,0.455948,0.024578,0.000000,2.750000,1.480977,271,Spiral
4,2.00,0.448617,0.037275,0.000000,1.111111,1.192760,272,Spiral
...,...,...,...,...,...,...,...,...
286,2.25,0.303014,0.080512,0.078444,1.513228,0.670662,242,Rectangular
287,2.50,0.298203,0.064837,0.300000,1.666667,0.799331,243,Rectangular
288,2.50,0.298203,0.064837,0.600000,1.666667,0.770925,244,Rectangular
289,2.00,0.167179,0.112889,0.566893,1.312336,0.528761,245,Rectangular


In [49]:
# Merge the two dataframes on the 'name' column
merged_data = pd.merge(cal_data_df, nondim_params, on='UniqueId')

# Save the merged data to a new CSV file
merged_data.to_csv('merged_data.csv', index=False)

merged_data.columns

Index(['eta1', 'kappa_k', 'kappa', 'sig', 'lam', 'mup', 'sigp', 'rsmax', 'n',
       'alpha', 'alpha1', 'alpha2', 'betam1', 'gamma', 'UniqueId', 'Name',
       'PeakDrift', 'FailureType', 'ar', 'lrr', 'srr', 'alr', 'sdr', 'smr',
       'Type'],
      dtype='object')

In [50]:
'''# Load spiral_data_use.csv which contains the indices for the test data that we should use
spiral_data_use = pd.read_csv('spiral_data_use.csv')

# Get rid of instances where use == 0 

spiral_data_use = spiral_data_use[spiral_data_use['use'] == 1]
spiral_data_use'''

"# Load spiral_data_use.csv which contains the indices for the test data that we should use\nspiral_data_use = pd.read_csv('spiral_data_use.csv')\n\n# Get rid of instances where use == 0 \n\nspiral_data_use = spiral_data_use[spiral_data_use['use'] == 1]\nspiral_data_use"

In [51]:
# Create folder to save files with the configuration code
config_code = 'Shear-AllTypes'

# Generate filters

# Filter 1: Peak drift < 0.04
# filter1 = merged_data['PeakDrift'] < 0.1

# Filter 2: Failure type is flexure
filter2 = merged_data['FailureType'] != 'Flexure'

# Filter 3: Failure type is spiral and rectangular
filter3 = (merged_data['Type'] == 'Spiral') | (merged_data['Type'] == 'Rectangular')

# Apply the filters
filtered_data = merged_data[filter2 & filter3]

# Randomly shuffle the data
merged_data_shuffle = filtered_data.sample(frac=1, random_state=1).reset_index(drop=True)

# Generate a split for training and testing
split = 0.75
train_data = merged_data_shuffle.iloc[:int(split*len(merged_data_shuffle)), :]
test_data = merged_data_shuffle.iloc[int(split*len(merged_data_shuffle)):, :]

# These are the indices for the calibrated parameters and the nondimensional parameters (column)
# DO NOT MODIFY THESE
cal_params_index = np.arange(0, 14)
nd_params_index = np.arange(18, 24)

# Extract the calibrated and nondimensional parameters for both training and testing
cal_params_all = merged_data.iloc[:, cal_params_index]
nondim_params_all = merged_data.iloc[:, nd_params_index]

# Extract just the training data
cal_params_train = train_data.iloc[:, cal_params_index]
nondim_params_train = train_data.iloc[:, nd_params_index]

# Extract just the testing data
cal_params_test = test_data.iloc[:, cal_params_index]
nondim_params_test = test_data.iloc[:, nd_params_index]

print('We have {} training samples and {} testing samples'.format(len(train_data), len(test_data)))

We have 63 training samples and 21 testing samples


In [None]:
# Save to separate txt files with 5 decimal places

if not os.path.exists(os.path.join('quoFEM_Surrogate',config_code)):
    os.makedirs(os.path.join('quoFEM_Surrogate',config_code))

# Save training data
nondim_params_train.to_csv(os.path.join('quoFEM_Surrogate', config_code, 'input_train.txt'), 
                     sep='\t', 
                     index=False, 
                     float_format='%.5f'
                     )

cal_params_train.to_csv(os.path.join('quoFEM_Surrogate', config_code, 'output_train.txt'), 
                  sep='\t', 
                  index=False, 
                  float_format='%.5f'
                  )

# Save testing data
nondim_params_test.to_csv(os.path.join('quoFEM_Surrogate', config_code, 'input_test.txt'), 
                     sep='\t', 
                     index=False, 
                     float_format='%.5f'
                     )

cal_params_test.to_csv(os.path.join('quoFEM_Surrogate', config_code, 'output_test.txt'),
                    sep='\t', 
                    index=False, 
                    float_format='%.5f'
                    )

# Save train and test data into CSV files
train_data.to_csv(os.path.join('quoFEM_Surrogate', config_code, 'train_data.csv'), index=False)
test_data.to_csv(os.path.join('quoFEM_Surrogate', config_code, 'test_data.csv'), index=False)
