In [12]:
import os
import pandas as pd
import cmath
import numpy as np
from scipy.interpolate import interp1d
import re
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import joblib

# 1. Identify relevant files
file_list = os.listdir()
sweep_files = [f for f in file_list if f.endswith('.csv') and 'sweep' in f]
print(f"Identified {len(sweep_files)} sweep files: {sweep_files}")

preprocessed_dfs = []

for file in sweep_files:
    df = pd.read_csv(file)
    # Assuming the columns are 'frequency_Hz', 'S11_real', 'S11_imag', 'S21_real', 'S21_imag'
    # Based on previous inspection, only S11 data is present with columns 'S11_real', 'S11_imag'
    df['S11 Magnitude'] = df.apply(lambda row: cmath.polar(complex(row['S11_real'], row['S11_imag']))[0], axis=1)
    df['S11 Phase'] = df.apply(lambda row: cmath.polar(complex(row['S11_real'], row['S11_imag']))[1], axis=1)

    # Check if S21 data exists before processing
    if 'S21_real' in df.columns and 'S21_imag' in df.columns:
        df['S21 Magnitude'] = df.apply(lambda row: cmath.polar(complex(row['S21_real'], row['S21_imag']))[0], axis=1)
        df['S21 Phase'] = df.apply(lambda row: cmath.polar(complex(row['S21_real'], row['S21_imag']))[1], axis=1)
        preprocessed_dfs.append(df[['frequency_Hz', 'S11 Magnitude', 'S11 Phase', 'S21 Magnitude', 'S21 Phase']])
    else:
        preprocessed_dfs.append(df[['frequency_Hz', 'S11 Magnitude', 'S11 Phase']])

print(f"\nSuccessfully preprocessed {len(preprocessed_dfs)} dataframes.")
display(preprocessed_dfs[0].head())

# 3. Standardize frequency points
min_freq = min(df['frequency_Hz'].min() for df in preprocessed_dfs)
max_freq = max(df['frequency_Hz'].max() for df in preprocessed_dfs)
num_points = 1000  # Define a fixed number of points for the common axis
common_freq_axis = np.linspace(min_freq, max_freq, num_points)

interpolated_dfs = []

for df in preprocessed_dfs:
    freq = df['frequency_Hz']
    s11_mag = df['S11 Magnitude']
    s11_phase = df['S11 Phase']

    interp_mag = interp1d(freq, s11_mag, kind='linear', fill_value='extrapolate')
    interp_phase = interp1d(freq, s11_phase, kind='linear', fill_value='extrapolate')

    interpolated_df = pd.DataFrame({
        'frequency_Hz': common_freq_axis,
        'S11 Magnitude': interpolated_mag,
        'S11 Phase': interpolated_phase
    })

    # Include S21 data if it exists
    if 'S21 Magnitude' in df.columns and 'S21 Phase' in df.columns:
        s21_mag = df['S21 Magnitude']
        s21_phase = df['S21 Phase']
        interp_s21_mag = interp1d(freq, s21_mag, kind='linear', fill_value='extrapolate')
        interp_s21_phase = interp1d(freq, s21_phase, kind='linear', fill_value='extrapolate')
        interpolated_df['S21 Magnitude'] = interp_s21_mag(common_freq_axis)
        interpolated_df['S21 Phase'] = interp_s21_phase(common_freq_axis)

    interpolated_dfs.append(interpolated_df)

print(f"\nSuccessfully interpolated {len(interpolated_dfs)} dataframes to a common frequency axis.")
display(interpolated_dfs[0].head())

# 4. Combine data
combined_dfs_list = []

for i, df in enumerate(interpolated_dfs):
    # Extract circuit type from the filename
    filename = sweep_files[i]
    match = re.search(r'Circuit_\d+_(.+?)_sweep\.csv', filename)
    if match:
        circuit_type = match.group(1).replace('_', ' ')
    else:
        circuit_type = 'Unknown' # Default to 'Unknown' if pattern doesn't match

    df_with_type = df.copy()
    df_with_type['Circuit Type'] = circuit_type
    combined_dfs_list.append(df_with_type)

# Concatenate all dataframes in the list
combined_df = pd.concat(combined_dfs_list, ignore_index=True)

print("\nSuccessfully combined all dataframes.")
display(combined_df.head())

# 5. Handle missing S21 data with zero imputation
# For circuits without S21 data, the S21 Magnitude and S21 Phase columns will contain NaNs.
# We will impute these NaN values with 0. This is a common practice when the absence of a
# feature (like an S21 port) can be meaningfully represented by a zero value in the context
# of the data (no transmission).
s21_cols = ['S21 Magnitude', 'S21 Phase']
for col in s21_cols:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].fillna(0)
        print(f"Filled NaN values in '{col}' with 0.")
    else:
        # This case should not happen if S11 is always present, but good for robustness
        print(f"Column '{col}' not found in combined_df.")


# 6. Normalize data
# Identify S-parameter columns for normalization, now including S21 if present after imputation
s_param_cols = [col for col in combined_df.columns if 'S11' in col or 'S21' in col]
s_params = combined_df[s_param_cols]

scaler = StandardScaler()
normalized_s_params = scaler.fit_transform(s_params)

normalized_col_names = [col + ' Normalized' for col in s_param_cols]
normalized_df = pd.DataFrame(normalized_s_params, columns=normalized_col_names)

# Combine the normalized S-parameters with the Circuit Type and frequency_Hz columns
combined_df = combined_df.reset_index(drop=True)
normalized_df = normalized_df.reset_index(drop=True)

columns_to_keep = ['frequency_Hz', 'Circuit Type']
combined_df_normalized = pd.concat([combined_df[columns_to_keep], normalized_df], axis=1)

print("\nSuccessfully normalized S-parameter data (including zero-imputed S21).")
display(combined_df_normalized.head())

# 7. Prepare for DL
# Separate features and labels
feature_cols_normalized = [col for col in combined_df_normalized.columns if 'Normalized' in col]
features = combined_df_normalized[feature_cols_normalized]
labels = combined_df_normalized['Circuit Type']

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Reshape features for CNN input (assuming a 1D CNN)
# Input shape: (number of samples, number of time steps, number of features)
# Our data has 'num_points' time steps (frequency points) per sample and 'len(feature_cols_normalized)' features (S11 and potentially S21).
# We need to group by circuit type and then reshape.

# First, group features by circuit type to treat each circuit's sweep as a single sample
grouped_features = features.values.reshape(-1, num_points, len(feature_cols_normalized))

# The number of samples is the number of unique circuit types
num_samples = len(label_encoder.classes_)

# The labels also need to be aligned with the grouped features, one label per sample (circuit type)
# Since the original data is already ordered by circuit type due to concatenation,
# we can take the first label for each block of `num_points`
encoded_labels_reshaped = encoded_labels[::num_points]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(grouped_features, encoded_labels_reshaped, test_size=0.2, random_state=42)

print("\nData prepared for DL.")
print("Original features shape:", features.shape)
print("Encoded labels shape:", encoded_labels.shape)
print("Reshaped features shape:", grouped_features.shape)
print("Reshaped encoded labels shape:", encoded_labels_reshaped.shape)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# 8. Save standardized data
# Save NumPy arrays
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)

# Save the label encoder
joblib.dump(label_encoder, 'label_encoder.joblib')

print("\nData and label encoder saved successfully.")

# 9. Finish task
print("\nData standardization and preparation for Deep Learning are complete.")
print("Zero imputation was used for circuits without S21 data.")
print("The standardized data (X_train.npy, X_test.npy, y_train.npy, y_test.npy) and the label encoder (label_encoder.joblib) have been saved.")
print("You can now use these files to train your Deep Learning model.")

Identified 18 sweep files: ['Circuit_1_RLC_Series_Parallel_1_sweep.csv', 'Circuit_13_Short_Circuit_sweep.csv', 'Circuit_11_400MHz_Low_Pass_Filter_c11_sweep.csv', 'Circuit_4_75_Ohm_Resistor_sweep.csv', 'Circuit_3_33_Ohm_Resistor_sweep.csv', 'Circuit_18_3dB_Attenuation_Circuit_sweep.csv', 'Circuit_17_10dB_Attenuation_Circuit_sweep.csv', 'Circuit_6_10_7MHz_Ceramic_Notch_Filter_low_sweep.csv', 'Circuit_14_Open_Circuit_sweep.csv', 'Circuit_12_500MHz_High_Pass_Filter_c12_sweep.csv', 'Circuit_7_RC_Series_Circuit_sweep.csv', 'Circuit_8_LC_Series_Circuit_sweep.csv', 'Circuit_9_Capacitor_sweep.csv', 'Circuit_5_6_5MHz_Ceramic_Notch_Filter_low_sweep.csv', 'Circuit_16_Thru_Circuit_sweep.csv', 'Circuit_10_Inductor_sweep.csv', 'Circuit_2_RLC_Series_Parallel_2_sweep.csv', 'Circuit_15_50_Ohm_Load_sweep.csv']

Successfully preprocessed 18 dataframes.


Unnamed: 0,frequency_Hz,S11 Magnitude,S11 Phase
0,1000000,0.426606,0.807827
1,1290000,0.423355,0.791333
2,1580000,0.420244,0.774825
3,1870000,0.41715,0.758311
4,2160000,0.414214,0.742006



Successfully interpolated 18 dataframes to a common frequency axis.


Unnamed: 0,frequency_Hz,S11 Magnitude,S11 Phase
0,1000000.0,0.011568,-2.335886
1,1029029.0,0.011568,-2.337306
2,1058058.0,0.011568,-2.338726
3,1087087.0,0.011568,-2.340146
4,1116116.0,0.011567,-2.341566



Successfully combined all dataframes.


Unnamed: 0,frequency_Hz,S11 Magnitude,S11 Phase,Circuit Type,S21 Magnitude,S21 Phase
0,1000000.0,0.011568,-2.335886,RLC Series Parallel 1,,
1,1029029.0,0.011568,-2.337306,RLC Series Parallel 1,,
2,1058058.0,0.011568,-2.338726,RLC Series Parallel 1,,
3,1087087.0,0.011568,-2.340146,RLC Series Parallel 1,,
4,1116116.0,0.011567,-2.341566,RLC Series Parallel 1,,


Filled NaN values in 'S21 Magnitude' with 0.
Filled NaN values in 'S21 Phase' with 0.

Successfully normalized S-parameter data (including zero-imputed S21).


Unnamed: 0,frequency_Hz,Circuit Type,S11 Magnitude Normalized,S11 Phase Normalized,S21 Magnitude Normalized,S21 Phase Normalized
0,1000000.0,RLC Series Parallel 1,1.34292,2.162537,-0.507347,-0.390618
1,1029029.0,RLC Series Parallel 1,1.342744,2.150832,-0.507347,-0.390618
2,1058058.0,RLC Series Parallel 1,1.342568,2.139127,-0.507347,-0.390618
3,1087087.0,RLC Series Parallel 1,1.342392,2.127422,-0.507347,-0.390618
4,1116116.0,RLC Series Parallel 1,1.342216,2.115717,-0.507347,-0.390618



Data prepared for DL.
Original features shape: (18000, 4)
Encoded labels shape: (18000,)
Reshaped features shape: (18, 1000, 4)
Reshaped encoded labels shape: (18,)
X_train shape: (14, 1000, 4)
X_test shape: (4, 1000, 4)
y_train shape: (14,)
y_test shape: (4,)

Data and label encoder saved successfully.

Data standardization and preparation for Deep Learning are complete.
Zero imputation was used for circuits without S21 data.
The standardized data (X_train.npy, X_test.npy, y_train.npy, y_test.npy) and the label encoder (label_encoder.joblib) have been saved.
You can now use these files to train your Deep Learning model.


In [11]:
# Check for circuits that have S21 data
circuits_with_s21 = combined_df_normalized[combined_df_normalized['S21 Magnitude Normalized'].notna()]['Circuit Type'].unique()
print(f"Circuits with S21 data: {circuits_with_s21}")

# Check for circuits that do not have S21 data
circuits_without_s21 = combined_df_normalized[combined_df_normalized['S21 Magnitude Normalized'].isna()]['Circuit Type'].unique()
print(f"Circuits without S21 data: {circuits_without_s21}")

# Display head of combined_df_normalized for a circuit with S21 data (if any)
if len(circuits_with_s21) > 0:
    print("\nSample data for a circuit WITH S21 data:")
    display(combined_df_normalized[combined_df_normalized['Circuit Type'] == circuits_with_s21[0]].head())

# Display head of combined_df_normalized for a circuit without S21 data (if any)
if len(circuits_without_s21) > 0:
    print("\nSample data for a circuit WITHOUT S21 data:")
    display(combined_df_normalized[combined_df_normalized['Circuit Type'] == circuits_without_s21[0]].head())

# Check for NaN values in the S21 columns
print("\nChecking for NaN values in S21 columns:")
print(combined_df_normalized[['S21 Magnitude Normalized', 'S21 Phase Normalized']].isna().sum())

Circuits with S21 data: ['400MHz Low Pass Filter c11' '3dB Attenuation Circuit'
 '10dB Attenuation Circuit' '10 7MHz Ceramic Notch Filter low'
 '500MHz High Pass Filter c12' '6 5MHz Ceramic Notch Filter low'
 'Thru Circuit']
Circuits without S21 data: ['RLC Series Parallel 1' 'Short Circuit' '75 Ohm Resistor'
 '33 Ohm Resistor' 'Open Circuit' 'RC Series Circuit' 'LC Series Circuit'
 'Capacitor' 'Inductor' 'RLC Series Parallel 2' '50 Ohm Load']

Sample data for a circuit WITH S21 data:


Unnamed: 0,frequency_Hz,Circuit Type,S11 Magnitude Normalized,S11 Phase Normalized,S21 Magnitude Normalized,S21 Phase Normalized
2000,1000000.0,400MHz Low Pass Filter c11,1.34292,2.162537,1.355735,0.281433
2001,1029029.0,400MHz Low Pass Filter c11,1.342744,2.150832,1.355652,0.280969
2002,1058058.0,400MHz Low Pass Filter c11,1.342568,2.139127,1.355568,0.280505
2003,1087087.0,400MHz Low Pass Filter c11,1.342392,2.127422,1.355484,0.280041
2004,1116116.0,400MHz Low Pass Filter c11,1.342216,2.115717,1.355401,0.279577



Sample data for a circuit WITHOUT S21 data:


Unnamed: 0,frequency_Hz,Circuit Type,S11 Magnitude Normalized,S11 Phase Normalized,S21 Magnitude Normalized,S21 Phase Normalized
0,1000000.0,RLC Series Parallel 1,1.34292,2.162537,,
1,1029029.0,RLC Series Parallel 1,1.342744,2.150832,,
2,1058058.0,RLC Series Parallel 1,1.342568,2.139127,,
3,1087087.0,RLC Series Parallel 1,1.342392,2.127422,,
4,1116116.0,RLC Series Parallel 1,1.342216,2.115717,,



Checking for NaN values in S21 columns:
S21 Magnitude Normalized    11000
S21 Phase Normalized        11000
dtype: int64
