# Dependencies

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from joblib import dump, load
import time
from scipy.interpolate import PchipInterpolator
import scipy
from collections import defaultdict

from tqdm import tqdm

from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

from itertools import chain

import os

# Data

In [2]:
# Specify the directory path
dir_path = "../files/data/TWIN/CS/CLEANED"

# Get all .csv files
csv_files = [f for f in os.listdir(dir_path) if f.endswith(".csv")]

one_fifth = int(len(csv_files)/5)

one_fifth, len(csv_files)/one_fifth

(22, 5.181818181818182)

In [3]:
for idx in tqdm(range(6)):
    df = pd.read_csv("../files/data/TWIN/CS/CLEANED/{}".format(csv_files[idx*one_fifth]), index_col=False)
    df = df[(df['a1/t'] <= 0.5) & (df['a2/t'] <= 0.5)]

    for csv_file in csv_files[idx*one_fifth+1:idx*one_fifth+one_fifth]:
        df_ = pd.read_csv("../files/data/TWIN/CS/CLEANED/{}".format(csv_file), index_col=False)
        df_ = df_[(df_['a1/t'] <= 0.5) & (df_['a2/t'] <= 0.5)]
        df = pd.concat([df, df_], axis=0, ignore_index=True)

    df.to_csv("../files/data/FINAL_CSV/TWIN/TWIN_CORNER_CRACK_CS2_QUARTER_ELLIPSE_PART_{}.csv".format(idx+1), index=False)

100%|██████████| 6/6 [18:35<00:00, 185.87s/it]


# Data Viz

In [4]:
for idx in tqdm(range(6)):
    df = pd.read_csv("../files/data/FINAL_CSV/TWIN/TWIN_CORNER_CRACK_CS2_QUARTER_ELLIPSE_PART_{}.csv".format(idx+1))

    crack_idx = df['c_index'].unique()

    crack_idx_train, crack_idx_test = train_test_split(crack_idx, test_size=0.25, random_state=10)

    # Create a boolean mask for rows where the value in '"c_index"' is in list1
    train_list_mask = df["c_index"].isin(crack_idx_train)

    # Create a boolean mask for rows where the value in '"c_index"' is in list2
    test_list_mask = df["c_index"].isin(crack_idx_test)

    # Use the masks to create the two new DataFrames
    df_train = df[train_list_mask].copy() # Using .copy() to avoid SettingWithCopyWarning
    df_test = df[test_list_mask].copy() # Using .copy() to avoid SettingWithCopyWarning


    assert (np.sort(crack_idx_train) == np.sort(df_train['c_index'].unique())).all()
    assert (np.sort(crack_idx_test) == np.sort(df_test['c_index'].unique())).all()

    # Save the cleaned and processed DataFrame to a new CSV file
    df_train.to_csv("../files/data/FINAL_CSV/TWIN/TRAIN/TWIN_CORNER_CRACK_CS2_QUARTER_ELLIPSE_PART_{}_TRAIN.csv".format(idx+1), index=False)
    df_test.to_csv("../files/data/FINAL_CSV/TWIN/TEST/TWIN_CORNER_CRACK_CS2_QUARTER_ELLIPSE_PART_{}_TEST.csv".format(idx+1), index=False)

100%|██████████| 6/6 [17:31<00:00, 175.20s/it]


In [None]:
d = df.to_numpy()

num_crack = np.unique(d[:,0])

print("Total cracks: ", len(num_crack))

np.random.seed(0)
fig, axs = plt.subplots(2, 5, figsize=(30,12))
for i in range(2):
    for j in range(5):
        idx = np.random.randint(0, len(num_crack))
        data = d[idx*128:idx*128+128]
        W_R = np.unique(data[:,1])
        a_c = np.unique(data[:,2])
        a_t = np.unique(data[:,3])
        r_t = np.unique(data[:,4])
        b_t = np.unique(data[:,5])

        assert len(W_R) == 1
        assert len(a_c) == 1
        assert len(a_t) == 1
        assert len(r_t) == 1
        assert len(b_t) == 1

        axs[i,j].scatter(data[:,-4], data[:,-3], color='purple', s=10)
        axs[i,j].plot(data[:,-4], data[:,-3], label="K-T", color='purple', linestyle=":")

        axs[i,j].scatter(data[:,-4], data[:,-2], color='green', s=10)
        axs[i,j].plot(data[:,-4], data[:,-2], label="K-B", color='green', linestyle=":")

        axs[i,j].scatter(data[:,-4], data[:,-1], color='red', s=10)
        axs[i,j].plot(data[:,-4], data[:,-1], label="K-P", color='red', linestyle=":")

        axs[i,j].set_title("W/R:{} a/c:{} a/t:{} r/t:{} b/t:{}".format(W_R[0], a_c[0], a_t[0], r_t[0], b_t[0]))
        axs[i,j].set_ylabel("SIFs")
        axs[i,j].set_xlabel(r"$\phi$")
        
        if i == 0 and j == 0:
            axs[i,j].legend()

plt.show()