In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import mean_squared_error, r2_score

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
random_state = 20
threshold = 1 # Accuracy scoring threshold

In [3]:
# Import data and preprocess
df = pd.read_csv("../test_data/new leaky wave/S11_Data_combined.csv")
df = df.drop(df[df['dB(S(1,1)) []'] > 0].index) # Remove all rows with positive s11

In [4]:
df

Unnamed: 0,cpw_in [mm],feed_l [mm],patch_l [mm],cpw_g [mm],Feed_W [mm],ground_w [mm],patch_ground_w [mm],patch_w [mm],Freq [GHz],"dB(S(1,1)) []"
0,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.00,-1.455338
1,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.09,-1.495640
2,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.18,-1.566487
3,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.27,-1.667701
4,1.5,3.25,3.25,0.18,1.0,1.0,1.0,4.75,11.36,-1.799854
...,...,...,...,...,...,...,...,...,...,...
26659,2.0,3.75,3.50,0.18,1.0,1.5,1.5,5.00,19.64,-3.573784
26660,2.0,3.75,3.50,0.18,1.0,1.5,1.5,5.00,19.73,-2.925513
26661,2.0,3.75,3.50,0.18,1.0,1.5,1.5,5.00,19.82,-2.792273
26662,2.0,3.75,3.50,0.18,1.0,1.5,1.5,5.00,19.91,-3.092972


In [5]:
# Split into x and y
input_x = df.drop(columns=['dB(S(1,1)) []'], axis=1)
input_y = df[['dB(S(1,1)) []']]

In [6]:
# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, random_state=random_state)

In [7]:
model = XGBRegressor(n_estimators=1500, min_child_weight=1, max_depth=10, learning_rate=0.1)
scaler = StandardScaler() # Initialize scaler
pipeline = Pipeline(steps=[('normalize', scaler), ('model', model)]) # Create pipeline with scaler and model

In [8]:
%%time
# Train and predict the pipeline
pipeline_fit = pipeline.fit(X_train, y_train)

CPU times: user 1min 2s, sys: 843 ms, total: 1min 3s
Wall time: 3.43 s


In [9]:
%%time
predictions = pipeline_fit.predict(X_test)

CPU times: user 699 ms, sys: 0 ns, total: 699 ms
Wall time: 40.2 ms


In [10]:
# Check if predicted value is threshold amount above or below actual value
def is_in_threshold(actual, pred):
    return pred <= actual + threshold and pred >= actual - threshold

In [11]:
# Create new boolean column that shows if the test and prediction values are the same
results = X_test.copy()
def create_tf_column(results):
    return results.apply(lambda x: is_in_threshold(x['y_test'], x['predictions']), axis=1)

In [12]:
# Calculate accuracy of model by number of predictions that are within threshold value above or below the test value for each row
def get_score(X_test, y_test, clf_dt):
    predictions = clf_dt.predict(X_test)
    dataframe = pd.DataFrame(X_test.copy(), columns=input_x.columns)
    dataframe['y_test'] = y_test.values
    dataframe['predictions'] = predictions
    return create_tf_column(dataframe).value_counts().get(True) / dataframe.shape[0]

In [13]:
X_test

Unnamed: 0,cpw_in [mm],feed_l [mm],patch_l [mm],cpw_g [mm],Feed_W [mm],ground_w [mm],patch_ground_w [mm],patch_w [mm],Freq [GHz]
11357,2.00,3.75,4.25,0.18,1.00,1.00,1.00,4.75,15.05
20975,2.00,3.75,3.50,0.18,1.00,0.75,0.75,4.50,17.12
9687,1.50,4.25,4.00,0.18,1.00,1.00,1.00,4.75,19.28
13688,2.00,3.75,3.50,0.42,1.00,1.00,1.00,4.75,15.77
9717,1.75,4.25,4.00,0.18,1.00,1.00,1.00,4.75,12.89
...,...,...,...,...,...,...,...,...,...
4034,2.50,3.75,3.50,0.18,1.00,1.00,1.00,4.75,19.55
8811,2.00,3.75,4.00,0.18,1.00,1.00,1.00,4.75,13.16
3859,2.25,3.75,3.50,0.18,1.00,1.00,1.00,4.75,12.89
15866,2.00,3.75,3.50,1.32,1.25,1.00,1.00,4.75,11.81


In [14]:
print(f"Score within +-{threshold}: {get_score(X_test, y_test, pipeline)}")
print("RMSE:", mean_squared_error(y_test, predictions, squared=False))
print("R^2:", r2_score(y_test, predictions))

Score within +-1: 0.7211221122112211
RMSE: 2.4247640854596395
R^2: 0.8080766757105057


In [16]:
joblib.dump(pipeline, "antenna_model.pkl")

['antenna_model.pkl']