In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import mean_squared_error, r2_score

In [10]:
random_state = 42
threshold = 1 # Accuracy scoring threshold

In [11]:
# Import data and preprocess
df = pd.read_csv("../test_data/new leaky wave/S11_Data_combined_w_extra.csv")
df = df.drop(df[df['dB(S(1,1)) []'] > 0].index) # Remove all rows with positive s11

In [12]:
df

Unnamed: 0,cpw_in [mm],feed_l [mm],patch_l [mm],cpw_g [mm],Feed_W [mm],ground_w [mm],patch_ground_w [mm],patch_w [mm],Freq [GHz],"dB(S(1,1)) []"
0,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.00,-1.455338
1,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.09,-1.495640
2,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.18,-1.566487
3,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.27,-1.667701
4,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.36,-1.799854
...,...,...,...,...,...,...,...,...,...,...
31507,3.6,3.75,3.50,0.72,1.0,1.0,1.0,4.75,19.64,-5.836283
31508,3.6,3.75,3.50,0.72,1.0,1.0,1.0,4.75,19.73,-3.323590
31509,3.6,3.75,3.50,0.72,1.0,1.0,1.0,4.75,19.82,-1.998895
31510,3.6,3.75,3.50,0.72,1.0,1.0,1.0,4.75,19.91,-1.476707


In [13]:
# Split into x and y
input_x = df.drop(columns=['dB(S(1,1)) []'], axis=1)
input_y = df[['dB(S(1,1)) []']]

In [14]:
# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, random_state=random_state)

In [15]:
model = XGBRegressor(n_estimators=900, min_child_weight=1, max_depth=15, learning_rate=0.1, random_state=random_state)
scaler = StandardScaler() # Initialize scaler
pipeline = Pipeline(steps=[('normalize', scaler), ('model', model)]) # Create pipeline with scaler and model

In [16]:
%%time
# Train and predict the pipeline
pipeline_fit = pipeline.fit(X_train, y_train)

CPU times: user 2min 8s, sys: 726 ms, total: 2min 9s
Wall time: 6.93 s


In [17]:
%%time
predictions = pipeline_fit.predict(X_test)

CPU times: user 1.13 s, sys: 0 ns, total: 1.13 s
Wall time: 65.5 ms


In [18]:
# Check if predicted value is threshold amount above or below actual value
def is_in_threshold(actual, pred):
    return pred <= actual + threshold and pred >= actual - threshold

In [19]:
# Create new boolean column that shows if the test and prediction values are the same
results = X_test.copy()
def create_tf_column(results):
    return results.apply(lambda x: is_in_threshold(x['y_test'], x['predictions']), axis=1)

In [20]:
# Calculate accuracy of model by number of predictions that are within threshold value above or below the test value for each row
def get_score(X_test, y_test, clf_dt):
    predictions = clf_dt.predict(X_test)
    dataframe = pd.DataFrame(X_test.copy(), columns=input_x.columns)
    dataframe['y_test'] = y_test.values
    dataframe['predictions'] = predictions
    return create_tf_column(dataframe).value_counts().get(True) / dataframe.shape[0]

In [21]:
X_test

Unnamed: 0,cpw_in [mm],feed_l [mm],patch_l [mm],cpw_g [mm],Feed_W [mm],ground_w [mm],patch_ground_w [mm],patch_w [mm],Freq [GHz]
4256,2.00,4.00,3.50,0.18,1.00,1.0,1.00,4.75,12.26
13625,2.00,3.75,3.50,0.18,1.00,1.0,1.00,4.75,19.19
14937,2.00,3.75,3.50,0.30,0.75,1.0,1.00,4.75,19.10
18304,2.00,3.75,3.50,0.18,1.00,1.0,0.75,4.50,13.07
25557,1.50,3.75,3.50,0.18,1.00,1.0,1.00,4.75,11.36
...,...,...,...,...,...,...,...,...,...
12554,2.50,4.25,4.25,0.18,1.00,1.0,1.00,4.75,13.70
9401,2.25,4.00,4.00,0.18,1.00,1.0,1.00,4.75,11.72
1788,2.00,4.00,3.25,0.18,1.00,1.0,1.00,4.75,17.39
25951,2.25,3.75,3.50,0.18,1.00,1.0,1.00,4.75,19.55


In [22]:
print(f"Score within +-{threshold}: {get_score(X_test, y_test, pipeline)}")
print("RMSE:", mean_squared_error(y_test, predictions, squared=False))
print("R^2:", r2_score(y_test, predictions))

Score within +-1: 0.7343234323432343
RMSE: 2.31878511762776
R^2: 0.8336440607767523


In [23]:
joblib.dump(pipeline, "antenna_model_w_extra.pkl")

['antenna_model_w_extra.pkl']