# Data Matrix Preprocessing

This notebook serves to preprocess the `compliance_and_engineering_constants` dataset by reconstructing the 11x11 node connectivity matrix and 6x6 compliance matrix. In the original data, these are stored in individual columns, denoted by "NxN" for connectivity and "SNN" for compliance.  After reconstructing each matrix, these columns are dropped from the dataset and replaced by the corresponding matrix.

In [1]:
import pandas as pd
import numpy as np
import re
import pickle

In [2]:
# Load data from CSV file
data = pd.read_csv('../data/compliance_and_engineering_constants.csv')
display(data.head())
print(f"Data shape: {data.shape}")

Unnamed: 0,ρ,1x1,1x2,2x2,1x3,2x3,3x3,1x4,2x4,3x4,...,nu32,BV,GV,BR,GR,AU,cond_C_debug,sym_resid_debug,invert_warn_debug,pinv_debug
0,0.3,0,1,0,0,0,0,0,0,1,...,0.27012,0.044626,0.025066,0.044626,0.025047,0.003852,5.231462,0,False,
1,0.3,0,0,0,0,0,0,0,0,0,...,0.322746,0.042532,0.022705,0.042532,0.021679,0.236657,4.823461,0,False,
2,0.3,0,0,0,0,0,0,0,0,1,...,0.301303,0.039092,0.021717,0.039092,0.021247,0.110572,4.842287,0,False,
3,0.3,0,0,0,0,0,0,0,0,1,...,0.25682,0.045055,0.026767,0.045055,0.026755,0.002277,4.971338,0,False,
4,0.3,0,0,0,0,0,0,0,0,0,...,0.240568,0.027819,0.018944,0.027818,0.01886,0.022473,4.185499,0,False,


Data shape: (2624, 124)


In [3]:
# Drop debug columns
data = data.drop(columns=[col for col in data.columns if 'debug' in col])
display(data.head())
print(f"Data shape after dropping debug columns: {data.shape}")

Unnamed: 0,ρ,1x1,1x2,2x2,1x3,2x3,3x3,1x4,2x4,3x4,...,nu13,nu23,nu21,nu31,nu32,BV,GV,BR,GR,AU
0,0.3,0,1,0,0,0,0,0,0,1,...,0.270381,0.270225,0.270126,0.270115,0.27012,0.044626,0.025066,0.044626,0.025047,0.003852
1,0.3,0,0,0,0,0,0,0,0,0,...,0.322935,0.322292,0.324034,0.323716,0.322746,0.042532,0.022705,0.042532,0.021679,0.236657
2,0.3,0,0,0,0,0,0,0,0,1,...,0.300551,0.30127,0.301656,0.300691,0.301303,0.039092,0.021717,0.039092,0.021247,0.110572
3,0.3,0,0,0,0,0,0,0,0,1,...,0.25777,0.256933,0.257779,0.257614,0.25682,0.045055,0.026767,0.045055,0.026755,0.002277
4,0.3,0,0,0,0,0,0,0,0,0,...,0.244505,0.239779,0.237998,0.245358,0.240568,0.027819,0.018944,0.027818,0.01886,0.022473


Data shape after dropping debug columns: (2624, 120)


In [4]:
# Reconstruct node group connectivity matrix from columns "ixj"
upper_triangular_cols = [col for col in data.columns if re.match(r'\d{1,2}x\d{1,2}', col)]

def construct_connectivity_matrix(row, cols, split_by=''):
    matrix = np.zeros((11, 11))
    for col in cols:
        i, j = map(int, col.split(split_by))
        matrix[i-1, j-1] = row[col]
        matrix[j-1, i-1] = row[col]
    return np.array(matrix)

# Apply to data, reorder columns, and drop original "ixj" columns
data['connectivity_matrix'] = data.apply(lambda row: construct_connectivity_matrix(row, upper_triangular_cols, 'x'), axis=1)
cols = ['ρ', 'connectivity_matrix'] + [col for col in data.columns if col not in ['ρ', 'connectivity_matrix']]
data = data[cols]
data = data.drop(columns=upper_triangular_cols)
display(data.iloc[0]['connectivity_matrix'])
display(data.head())
print(f"Data shape after constructing connectivity matrices: {data.shape}")

array([[0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.]])

Unnamed: 0,ρ,connectivity_matrix,S11,S12,S13,S14,S15,S16,S21,S22,...,nu13,nu23,nu21,nu31,nu32,BV,GV,BR,GR,AU
0,0.3,"[[0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0,...",16.244217,-4.39059,-4.392122,0,0,0,-4.39059,16.253853,...,0.270381,0.270225,0.270126,0.270115,0.27012,0.044626,0.025066,0.044626,0.025047,0.003852
1,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",22.194444,-7.184493,-7.167363,0,0,0,-7.184493,22.172022,...,0.322935,0.322292,0.324034,0.323716,0.322746,0.042532,0.022705,0.042532,0.021679,0.236657
2,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",21.448658,-6.467827,-6.446426,0,0,0,-6.467827,21.441085,...,0.300551,0.30127,0.301656,0.300691,0.301303,0.039092,0.021717,0.039092,0.021247,0.110572
3,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",15.247635,-3.931171,-3.930385,0,0,0,-3.931171,15.250143,...,0.25777,0.256933,0.257779,0.257614,0.25682,0.045055,0.026767,0.045055,0.026755,0.002277
4,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",23.162733,-5.511611,-5.66341,0,0,0,-5.511611,23.158215,...,0.244505,0.239779,0.237998,0.245358,0.240568,0.027819,0.018944,0.027818,0.01886,0.022473


Data shape after constructing connectivity matrices: (2624, 55)


In [5]:
# Reconstruct compliance matrix from columns Sij
compliance_cols = [col for col in data.columns if re.match(r'S\d{1,2}', col)]
compliance_cols

def construct_compliance_matrix(row, cols):
    matrix = np.zeros((6, 6))
    for col in cols:
        i, j = map(int, list(col[1:]))
        matrix[i-1, j-1] = row[col]
        matrix[j-1, i-1] = row[col]
    return np.array(matrix)

# Apply to data, reorder columns, and drop original "ixj" columns
data['compliance_matrix'] = data.apply(lambda row: construct_compliance_matrix(row, compliance_cols), axis=1)
data = data.drop(columns=compliance_cols)
display(data.head())
print(f"Data shape after constructing compliance matrices: {data.shape}")

Unnamed: 0,ρ,connectivity_matrix,E1,E2,E3,G23,G13,G12,nu12,nu13,nu23,nu21,nu31,nu32,BV,GV,BR,GR,AU,compliance_matrix
0,0.3,"[[0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0,...",0.06156,0.061524,0.0615,0.025668,0.025591,0.025632,0.270286,0.270381,0.270225,0.270126,0.270115,0.27012,0.044626,0.025066,0.044626,0.025047,0.003852,"[[16.2442166588418, -4.390589663678543, -4.392..."
1,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.045056,0.045102,0.045165,0.026453,0.026486,0.026497,0.323707,0.322935,0.322292,0.324034,0.323716,0.322746,0.042532,0.022705,0.042532,0.021679,0.236657,"[[22.194443522542244, -7.184492662134119, -7.1..."
2,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.046623,0.046639,0.046645,0.024257,0.024269,0.024219,0.301549,0.300551,0.30127,0.301656,0.300691,0.301303,0.039092,0.021717,0.039092,0.021247,0.110572,"[[21.44865815819801, -6.467827160497426, -6.44..."
3,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.065584,0.065573,0.065544,0.027258,0.027189,0.027245,0.257822,0.25777,0.256933,0.257779,0.257614,0.25682,0.045055,0.026767,0.045055,0.026755,0.002277,"[[15.247634909853652, -3.931170890685161, -3.9..."
4,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.043173,0.043181,0.043323,0.01994,0.019963,0.019987,0.237952,0.244505,0.239779,0.237998,0.245358,0.240568,0.027819,0.018944,0.027818,0.01886,0.022473,"[[23.16273271145176, -5.511611078765939, -5.66..."


Data shape after constructing compliance matrices: (2624, 20)


In [6]:
# Reorder columns to have compliance_matrix after connectivity_matrix
engineering_cols = [col for col in data.columns if col not in ['ρ', 'connectivity_matrix', 'compliance_matrix']]

final_cols = ['ρ', 'connectivity_matrix', 'compliance_matrix'] + engineering_cols
data = data[final_cols]
display(data.head())

Unnamed: 0,ρ,connectivity_matrix,compliance_matrix,E1,E2,E3,G23,G13,G12,nu12,nu13,nu23,nu21,nu31,nu32,BV,GV,BR,GR,AU
0,0.3,"[[0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0,...","[[16.2442166588418, -4.390589663678543, -4.392...",0.06156,0.061524,0.0615,0.025668,0.025591,0.025632,0.270286,0.270381,0.270225,0.270126,0.270115,0.27012,0.044626,0.025066,0.044626,0.025047,0.003852
1,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[22.194443522542244, -7.184492662134119, -7.1...",0.045056,0.045102,0.045165,0.026453,0.026486,0.026497,0.323707,0.322935,0.322292,0.324034,0.323716,0.322746,0.042532,0.022705,0.042532,0.021679,0.236657
2,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[21.44865815819801, -6.467827160497426, -6.44...",0.046623,0.046639,0.046645,0.024257,0.024269,0.024219,0.301549,0.300551,0.30127,0.301656,0.300691,0.301303,0.039092,0.021717,0.039092,0.021247,0.110572
3,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[15.247634909853652, -3.931170890685161, -3.9...",0.065584,0.065573,0.065544,0.027258,0.027189,0.027245,0.257822,0.25777,0.256933,0.257779,0.257614,0.25682,0.045055,0.026767,0.045055,0.026755,0.002277
4,0.3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[23.16273271145176, -5.511611078765939, -5.66...",0.043173,0.043181,0.043323,0.01994,0.019963,0.019987,0.237952,0.244505,0.239779,0.237998,0.245358,0.240568,0.027819,0.018944,0.027818,0.01886,0.022473


In [7]:
# Save dataset with matrices as pickle
filename = '../data/connectivity_compliance_matrices.pkl'
data.to_pickle(filename)

print(f"Saved data with shape {data.shape} to {filename}")

Saved data with shape (2624, 20) to ../data/connectivity_compliance_matrices.pkl
