## Generating Training Sets for Stage 2

# Assigning CUDA and Importing CPTu Values

In [1]:
import torch
from Functions_CPTu_Cap_Copy import *

# Check if CUDA is available
cuda_available = torch.cuda.is_available()
print(f"CUDA available: {cuda_available}")

# If CUDA is available, print out the number of CUDA devices and their names
if cuda_available:
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No GPUs detected.")
    
    
import pandas as pd

# CPTu_data = pd.read_csv('CPTu_train_set.csv', header=None, skiprows=1)
# CPTu_data = pd.read_csv('CPTu_val_set.csv', header=None, skiprows=1)
CPTu_data = pd.read_csv('CPTu_test_set.csv', header=None, skiprows=1)

print(CPTu_data.shape[1])

CUDA available: True
Number of GPUs available: 1
GPU 0: NVIDIA GeForce RTX 2060
12


## Creating Range of Inputs

In [2]:
import numpy as np

l = np.arange(0,30.81,0.1) # 309 values
diam = np.arange(0.2, 1.7, 0.1) # 15 values

print(len(diam))
print(len(l))

print(diam[2])

15
309
0.4000000000000001


In [3]:
## Checking input parameters

In [4]:
def validate_cptu_data_frame(CPTu):
    expected_columns_count = CPTu_data.shape[1]   # Adjust based on your specific data structure
    if CPTu.shape[1] != expected_columns_count:
        raise ValueError(f"Unexpected number of columns in CPTu DataFrame. Expected {expected_columns_count}, got {CPTu.shape[1]}")
    if not isinstance(CPTu, pd.DataFrame):
        raise TypeError("CPTu data should be a pandas DataFrame.")
    print("CPTu DataFrame validation passed.")

def check_for_nans_and_infs_df(CPTu):
    if CPTu.isnull().any().any():
        raise ValueError("NaN values found in CPTu DataFrame.")
    if np.isinf(CPTu.select_dtypes(include=[np.number])).any().any():
        raise ValueError("Infinite values found in CPTu DataFrame.")
    print("No NaN or infinite values in CPTu DataFrame.")

def validate_lengths_and_diameters(L, b):
    # Convert lists to numpy arrays if they aren't already
    L = np.array(L) if not isinstance(L, np.ndarray) else L
    b = np.array(b) if not isinstance(b, np.ndarray) else b
    
    # Check for NaN values
    if np.isnan(L).any():
        raise ValueError("NaN values found in Lengths array.")
    if np.isnan(b).any():
        raise ValueError("NaN values found in Diameters array.")

    # Check for infinite values
    if np.isinf(L).any():
        raise ValueError("Infinite values found in Lengths array.")
    if np.isinf(b).any():
        raise ValueError("Infinite values found in Diameters array.")

    # Optionally, you can check for specific expected ranges or conditions
    if (L < 0).any() or (b <= 0).any():
        raise ValueError("Invalid values in Lengths or Diameters: Lengths should be non-negative and Diameters should be positive.")

    print("Lengths and Diameters validation passed.")

print(validate_cptu_data_frame(CPTu_data))
print(check_for_nans_and_infs_df(CPTu_data))
print(validate_lengths_and_diameters(l, diam))

CPTu DataFrame validation passed.
None
No NaN or infinite values in CPTu DataFrame.
None
Lengths and Diameters validation passed.
None


## Implementing Bearing Cap Equations

In [5]:
outputs = []
print(type(CPTu_data))
print(np.shape(CPTu_data))



CPTu_data_np = CPTu_data.to_numpy()

outputs= S2_bearing_cap(l, diam, CPTu_data_np)
print(np.shape(outputs))


   

# print(outputs[3])

<class 'pandas.core.frame.DataFrame'>
(3081, 12)
(3, 4635)


## checking values

In [6]:
# print(outputs[1][:400])

## Coverting to CSV File

In [7]:
print(outputs[0])
print(outputs[1])
print(np.shape(outputs[0]))
columns_to_drop = [4 * i + j for i in range((CPTu_data.shape[1] // 4)) for j in range(2)]

# Drop the columns
CPTu_data_dropped = CPTu_data.drop(CPTu_data.columns[columns_to_drop], axis=1)

# Flattening the remaining columns for each borehole group
flattened_CPTu_groups = []
for i in range(0, CPTu_data_dropped.shape[1], 2):  # Iterate over each pair of remaining columns
    group = CPTu_data_dropped.iloc[:, i:i+2].values
    flattened_group = np.concatenate(group.T)  # Flatten the group transposed to concatenate by columns
    flattened_CPTu_groups.append(flattened_group)

# Output the shape and type for confirmation
print(np.shape(flattened_CPTu_groups))  # Expected shape: (no_BH, 20) if original was 10 samples per column
print(type(flattened_CPTu_groups))
# print(flattened_CPTu_groups[0])

[-0.0062831853071795875, 12.393132913864275, 22.205827663515457, 43.52959324350803, 59.5980649414675, 76.77624820976354, 86.53296424508804, 81.70773239817902, 73.31025807022934, 68.85206163463191, 68.24798148735114, 70.1134962145467, 74.20565940688907, 77.63525772024512, 81.02938293171368, 84.6924156014621, 88.8603251979057, 91.77169251706248, 94.0716027696426, 98.25003624173046, 99.86078627115475, 101.99304014470547, 104.1189244774632, 103.62628505026846, 104.60109611793618, 106.26247000125306, 108.64499870030359, 110.38300261812371, 113.06641597433597, 116.39502135540869, 124.30157654632629, 131.61743002396628, 134.09787396723863, 136.4800934109733, 135.07974105200626, 134.8144520188455, 137.53400403484255, 138.49486903635147, 138.72925383945784, 138.34793988953572, 137.09620149053728, 137.68462715824944, 138.31871051148673, 139.5863590084219, 140.86144043695126, 141.38855922664018, 141.76677081550477, 141.76651519713667, 143.15358321663172, 144.87558117962703, 146.14438800476083, 14

## Padding Outputs and Exporting Results to CSV

In [8]:
# Setup the columns for DataFrame
columns = ['Length'] + ['Diameter'] + [f'CPTu_{i}' for i in range(2, 6164)] + ['Bearing_Cap']

# Calculate the total number of rows in the final DataFrame
total_rows = len(diam) * len(l) * (CPTu_data.shape[1] //4)

# Pre-allocate a numpy array for all data
data_array = np.empty((total_rows, len(columns)))

sets = (CPTu_data.shape[1] //4)

outputs = np.array(outputs)

# Calculate the total number of rows in the final DataFrame
total_rows = len(diam) * len(l) * (CPTu_data.shape[1] //4)


current_row = 0
for cptu_index in range(CPTu_data.shape[1] //4):  # Iterate over each CPTu set
    print(f"CPTu Set Index: {cptu_index}")
    for i in range(len(diam)):  # i indexes the diameters
        for k in range(len(l)):  # k indexes the lengths
            output_index = i * len(l) + k  # Flatten the 2D structure of diam and length into a single dimension
            # print(f"Length index {k}, Diameter index {i}, Output Index: {output_index}")
            
            # Assign data to the current row of the data_array
            data_array[current_row, 0] = l[k]
            data_array[current_row, 1] = diam[i]
            data_array[current_row, 2:6164] = flattened_CPTu_groups[cptu_index]  # Assuming each set can be directly assigned
            data_array[current_row, 6164] = outputs[cptu_index, output_index]  # Access the correct output from reshaped array
            current_row += 1




# Create a DataFrame from the numpy array
df = pd.DataFrame(data_array, columns=columns)

# Save the DataFrame to a CSV file
df.to_csv('Stage_2_testd_merged_reduced3.csv', index=False)
print("CSV file has been created with the shape:", df.shape)


CPTu Set Index: 0
CPTu Set Index: 1
CPTu Set Index: 2
CSV file has been created with the shape: (13905, 6165)
