In [1]:
import joblib
import pandas as pd
import sqlite3

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
conn = sqlite3.connect('../test_data/lw.db')

In [3]:
df = pd.read_csv("../test_data/new leaky wave/S11_Data_combined_w_extra.csv")
X = df.drop(columns=['dB(S(1,1)) []'])
complete_df = pd.DataFrame(columns=X.columns)

In [4]:
# Create all combinations of ranges 
ranges = {}
data_cols = list(X.columns)
for column in data_cols:
    ranges[column] = X[column].unique().tolist()

# Custom overrides
# ranges['inset_dist [mm]'] += [0.8, 1.2]
# ranges['L [mm]'] += [11.75, 12.25]
# ranges['W [mm]'] += [14.2, 14.4, 14.6, 15.0, 15.2, 15.4]
# ranges['W0 [mm]'] += [2.75, 3.25]
# ranges['y0 [mm]'] += [3.25, 3.75, 4.25, 4.75]

In [5]:
# Try cartesian product in pandas instead https://stackoverflow.com/a/13270110/3675086
ranges_df = pd.DataFrame(ranges[X.columns[0]], columns=[X.columns[0]])
for col in complete_df.columns[1:]:
    temp_df = pd.DataFrame(ranges[col], columns=[col])
    ranges_df = ranges_df.merge(temp_df, how='cross')

In [6]:
# Can only take about 10 mil at a time, or else we run out of ram on 64gb
# I iteratively did this process, appending to the sqlite db everytime 
start = 0
step = 10000000
# ranges_df = ranges_df[start:end]

In [7]:
model = joblib.load('../antenna_model_w_extra.pkl')


In [8]:
import math
count = math.ceil(ranges_df.shape[0]/step)
for i in range(count):
    print(i)

    start = step*i
    end = step*(i+1)
    if i+1 == count:
        print("last)")
        end=ranges_df.shape[0]

    print(f"cropping between {start} and {end}")
    

    # Remove duplicates that are already calculated https://stackoverflow.com/a/47107164
    df_all = ranges_df[start:end].merge(X.drop_duplicates(), on=X.columns.tolist(), how='left', indicator=True)

    # Assign source col to predicted or simulated based on where it came from
    # 0 = predicted, 1 = simulated
    df_all['source_simulated'] = df_all['_merge'].apply(lambda x: 0 if x == 'left_only' else 1)

    df_all.drop(columns=['_merge'], inplace=True, axis=1)

    predictions = model.predict(df_all.drop(columns='source_simulated'))
    df_all['prediction'] = predictions

    # Get simulated s11 values in same order as generated data
    # Must drop simulated s11 col because somewhere in the dataset there are multiple s11 simulations for the same geometries
    cols_to_compare = df.columns.values.tolist()
    cols_to_compare.remove("dB(S(1,1)) []")

    simulated_s11 = df.drop_duplicates(subset=cols_to_compare).sort_values(by=list(df.columns),axis=0)['dB(S(1,1)) []']

    # Replace predictions for geometries that already have simulated values 
    # Make sure to only include as many as needed for each iteration 
    simulated_count= df_all[df_all['source_simulated'] == 1].shape[0]
    df_all.loc[df_all['source_simulated'] == 1, "prediction"] = simulated_s11.values.tolist()[:simulated_count]

    how_many = df_all.to_sql(name='geometries', con=conn, index=False, if_exists='append')
    print(f"Appended {how_many}")

0
cropping between 0 and 10000000
Appended 10000000
1
cropping between 10000000 and 20000000
Appended 10000000
2
cropping between 20000000 and 30000000
Appended 10000000
3
cropping between 30000000 and 40000000
Appended 10000000
4
cropping between 40000000 and 50000000
Appended 10000000
5
cropping between 50000000 and 60000000
Appended 10000000
6
cropping between 60000000 and 70000000
Appended 10000000
7
cropping between 70000000 and 80000000
Appended 10000000
8
cropping between 80000000 and 90000000
Appended 10000000
9
cropping between 90000000 and 100000000
Appended 10000000
10
cropping between 100000000 and 110000000
Appended 10000000
11
cropping between 110000000 and 120000000
Appended 10000000
12
cropping between 120000000 and 130000000
Appended 10000000
13
cropping between 130000000 and 140000000
Appended 10000000
14
cropping between 140000000 and 150000000
Appended 10000000
15
cropping between 150000000 and 160000000
Appended 10000000
16
cropping between 160000000 and 170000000


In [9]:
# Remove duplicates that are already calculated https://stackoverflow.com/a/47107164
df_all = ranges_df[start:end].merge(X.drop_duplicates(), on=X.columns.tolist(), how='left', indicator=True)

# Assign source col to predicted or simulated based on where it came from
df_all['source_simulated'] = df_all['_merge'].apply(lambda x: 'Predicted' if x == 'left_only' else 'Simulated')

df_all.drop(columns=['_merge'], inplace=True, axis=1)

In [10]:
df_all.dtypes

cpw_in [mm]            float64
feed_l [mm]            float64
patch_l [mm]           float64
cpw_g [mm]             float64
Feed_W [mm]            float64
ground_w [mm]          float64
patch_ground_w [mm]    float64
patch_w [mm]           float64
Freq [GHz]             float64
source_simulated        object
dtype: object

### Investigate min max and step 

In [None]:
df = pd.read_csv("../test_data/patch_antenna/Patch Antenna S11 Data.csv")
X = df.drop(columns=['dB(S(1,1)) []'])

In [None]:
for col in X.columns:
    print(f"{col}: Range: {X[col].min()} - {X[col].max()}, avg: {X[col].mean()}")
    print(X[col].unique().tolist())
    print('')

inset_dist [mm]: Range: 0.6 - 1.4, avg: 0.9999999999999998
[0.6, 1.0, 1.4]

L [mm]: Range: 11.5 - 12.5, avg: 12.0
[11.5, 12.0, 12.5]

sub_thick [mm]: Range: 2 - 2, avg: 2.0
[2]

W [mm]: Range: 14.0 - 15.6, avg: 14.8
[14.0, 14.8, 15.6]

W0 [mm]: Range: 2.5 - 3.5, avg: 3.0
[2.5, 3.0, 3.5]

y0 [mm]: Range: 3.0 - 5.0, avg: 4.0
[3.0, 3.5, 4.0, 4.5, 5.0]

Freq [GHz]: Range: 4.0 - 12.0, avg: 8.0
[4.0, 4.08, 4.16, 4.24, 4.32, 4.4, 4.48, 4.56, 4.64, 4.72, 4.8, 4.88, 4.96, 5.04, 5.12, 5.2, 5.28, 5.36, 5.44, 5.52, 5.6, 5.68, 5.76, 5.84, 5.92, 6.0, 6.08, 6.16, 6.24, 6.32, 6.4, 6.48, 6.56, 6.64, 6.72, 6.8, 6.88, 6.96, 7.04, 7.12, 7.2, 7.28, 7.36, 7.44, 7.52, 7.6, 7.68, 7.76, 7.84, 7.92, 8.0, 8.08, 8.16, 8.24, 8.32, 8.4, 8.48, 8.56, 8.64, 8.72, 8.8, 8.88, 8.96, 9.04, 9.12, 9.2, 9.28, 9.36, 9.44, 9.52, 9.6, 9.68, 9.76, 9.84, 9.92, 10.0, 10.08, 10.16, 10.24, 10.32, 10.4, 10.48, 10.56, 10.64, 10.72, 10.8, 10.88, 10.96, 11.04, 11.12, 11.2, 11.28, 11.36, 11.44, 11.52, 11.6, 11.68, 11.76, 11.84, 11.92, 12