# Multiple Machine Learning Techniques for GitHub Dataset

In [232]:
import numpy as np
import pandas as pd

In [233]:
df = pd.read_csv('../data/radon-data.csv')

In [234]:
df

Unnamed: 0,id,time,radon,temperature,humidity,pressure,tvoc,sensor_id,state,state_time
0,21906,1569405062,202,25,50,1015,0,2,Off,1569404979
1,21907,1569405663,258,25,51,1015,0,2,On,1569405215
2,21908,1569406264,202,24,51,1015,0,2,Off,1569405671
3,21909,1569406865,182,24,51,1015,0,2,Off,1569406848
4,21910,1569407466,189,24,51,1015,0,2,Off,1569406866
...,...,...,...,...,...,...,...,...,...,...
87965,138640,1622736862,1344,22,43,1018,23,2,Off,1622736447
87966,138642,1622737462,1293,22,43,1018,34,2,Off,1622736963
87967,138644,1622738063,1223,22,43,1018,34,2,Off,1622736963
87968,138646,1622738663,1171,22,43,1018,34,2,Off,1622738463


# Linear Regression: Data Cleaning 

In [235]:
df = df.drop(columns=["time", "state_time", "id", "sensor_id", "state"])
df

Unnamed: 0,radon,temperature,humidity,pressure,tvoc
0,202,25,50,1015,0
1,258,25,51,1015,0
2,202,24,51,1015,0
3,182,24,51,1015,0
4,189,24,51,1015,0
...,...,...,...,...,...
87965,1344,22,43,1018,23
87966,1293,22,43,1018,34
87967,1223,22,43,1018,34
87968,1171,22,43,1018,34


In [236]:
##df['state'] = df['state'].replace({'Off': 0, 'On': 1})

In [237]:
df

Unnamed: 0,radon,temperature,humidity,pressure,tvoc
0,202,25,50,1015,0
1,258,25,51,1015,0
2,202,24,51,1015,0
3,182,24,51,1015,0
4,189,24,51,1015,0
...,...,...,...,...,...
87965,1344,22,43,1018,23
87966,1293,22,43,1018,34
87967,1223,22,43,1018,34
87968,1171,22,43,1018,34


In [238]:
df_summer = df.iloc[36000:48000]
df_summer.head()


Unnamed: 0,radon,temperature,humidity,pressure,tvoc
36000,388,24,57,1010,4
36001,383,24,57,1010,2
36002,398,24,57,1010,6
36003,388,24,57,1010,8
36004,388,24,57,1010,10


In [239]:
##df = df.drop(columns=["state"])

In [240]:
df_summer

Unnamed: 0,radon,temperature,humidity,pressure,tvoc
36000,388,24,57,1010,4
36001,383,24,57,1010,2
36002,398,24,57,1010,6
36003,388,24,57,1010,8
36004,388,24,57,1010,10
...,...,...,...,...,...
47995,172,26,52,1011,63
47996,168,26,52,1011,65
47997,202,26,52,1011,62
47998,202,26,52,1011,54


In [241]:
summer_min = df_summer.min() # needed to de-normalize data
summer_max = df_summer.max()

df_summer_normalized = (df_summer - summer_min) / (summer_max - summer_min)
df_summer_normalized.head()

Unnamed: 0,radon,temperature,humidity,pressure,tvoc
36000,0.11964,0.714286,0.666667,0.5,0.003463
36001,0.117911,0.714286,0.666667,0.5,0.001732
36002,0.123098,0.714286,0.666667,0.5,0.005195
36003,0.11964,0.714286,0.666667,0.5,0.006926
36004,0.11964,0.714286,0.666667,0.5,0.008658


In [242]:
df = df_summer_normalized

In [243]:
df

Unnamed: 0,radon,temperature,humidity,pressure,tvoc
36000,0.119640,0.714286,0.666667,0.500000,0.003463
36001,0.117911,0.714286,0.666667,0.500000,0.001732
36002,0.123098,0.714286,0.666667,0.500000,0.005195
36003,0.119640,0.714286,0.666667,0.500000,0.006926
36004,0.119640,0.714286,0.666667,0.500000,0.008658
...,...,...,...,...,...
47995,0.044952,1.000000,0.481481,0.541667,0.054545
47996,0.043568,1.000000,0.481481,0.541667,0.056277
47997,0.055325,1.000000,0.481481,0.541667,0.053680
47998,0.055325,1.000000,0.481481,0.541667,0.046753


# Linear Regression: Model Train/Test

In [244]:
from itertools import chain, combinations
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [245]:
independent_vars = ["temperature", "humidity", "pressure", "tvoc"]

# Function to get all combinations of the independent variables
def all_combinations(variables):
    return list(chain(*map(lambda x: combinations(variables, x), range(1, len(variables) + 1))))

# Get all combinations of independent variables
combinations = all_combinations(independent_vars)

# Dependent variable
dependent_var = "radon"

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(df[independent_vars], df[dependent_var], test_size=0.2, random_state=42)

# Initialize a dictionary to store R-squared scores
r2_scores = {}

# Iterate over each combination of independent variables
for combo in combinations:
    # Train a linear regression model
    model = LinearRegression()
    model.fit(X_train[list(combo)], y_train)

    # Make predictions using the test set
    y_pred = model.predict(X_test[list(combo)])

    # Calculate R-squared score
    r2 = r2_score(y_test, y_pred)

    # Store the R-squared score in the dictionary
    r2_scores[combo] = r2

# Find the combination with the highest R-squared score
best_combo = max(r2_scores, key=r2_scores.get)

print("Best combination of independent variables:", best_combo)
print("Highest R-squared score:", r2_scores[best_combo])

Best combination of independent variables: ('temperature', 'pressure', 'tvoc')
Highest R-squared score: 0.029810351158771287


# Logistic Regression: Data Cleaning

In [246]:
df_logistic_reg = pd.read_csv('../data/radon-data.csv')
df_logistic_reg = df_logistic_reg.drop(columns=["time", "state_time", "id", "sensor_id"])
df_logistic_reg['state'] = df_logistic_reg['state'].replace({'Off': 0, 'On': 1})
df_logistic_reg['radon_binary'] = df_logistic_reg['radon'].apply(lambda x: 1 if x > 300 else 0)
correlation = df_logistic_reg['radon_binary'].corr(df_logistic_reg['state'])
print(correlation)


0.3717580959030975


In [247]:
df_logistic_reg['radon_binary'] = df_logistic_reg['radon'].apply(lambda x: 1 if x > 300 else 0)
df_logistic_reg = df_logistic_reg.drop(columns=["radon"])
df_logistic_reg
##correlation = df_logistic_reg['radon_binary'].corr(df_logistic_reg['state'])
##print(correlation)

Unnamed: 0,temperature,humidity,pressure,tvoc,state,radon_binary
0,25,50,1015,0,0,0
1,25,51,1015,0,1,0
2,24,51,1015,0,0,0
3,24,51,1015,0,0,0
4,24,51,1015,0,0,0
...,...,...,...,...,...,...
87965,22,43,1018,23,0,1
87966,22,43,1018,34,0,1
87967,22,43,1018,34,0,1
87968,22,43,1018,34,0,1


# Logistic Regression: Model Train/Test

In [248]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from itertools import chain, combinations

# Independent variables
independent_vars = ["temperature", "humidity", "pressure", "tvoc", "state"]

# Function to get all combinations of the independent variables
def all_combinations(variables):
    return list(chain(*map(lambda x: combinations(variables, x), range(1, len(variables) + 1))))

# Get all combinations of independent variables
combinations = all_combinations(independent_vars)

# Dependent variable
dependent_var = "radon_binary"

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(df_logistic_reg[independent_vars], df_logistic_reg[dependent_var], test_size=0.2, random_state=42)

# Initialize a dictionary to store accuracy scores
accuracy_scores = {}

# Iterate over each combination of independent variables
for combo in combinations:
    # Train a logistic regression model
    model = LogisticRegression(max_iter=1000) # Increase max_iter if the algorithm does not converge
    model.fit(X_train[list(combo)], y_train)

    # Make predictions using the test set
    y_pred = model.predict(X_test[list(combo)])

    # Calculate accuracy score
    accuracy = accuracy_score(y_test, y_pred)

    # Store the accuracy score in the dictionary
    accuracy_scores[combo] = accuracy

# Find the combination with the highest accuracy score
best_combo = max(accuracy_scores, key=accuracy_scores.get)

print("Best combination of independent variables:", best_combo)
print("Highest accuracy score:", accuracy_scores[best_combo])

Best combination of independent variables: ('temperature', 'state')
Highest accuracy score: 0.7175741730135273


# XGBoost

In [249]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Independent variables
independent_vars = ["temperature", "humidity", "pressure", "tvoc", "state"]

# Function to get all combinations of the independent variables
def all_combinations(variables):
    return list(chain(*map(lambda x: combinations(variables, x), range(1, len(variables) + 1))))

# Get all combinations of independent variables
combinations = all_combinations(independent_vars)

# Dependent variable
dependent_var = "radon_binary"

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(df_logistic_reg[independent_vars], df_logistic_reg[dependent_var], test_size=0.2, random_state=42)

# Initialize a dictionary to store accuracy scores
accuracy_scores = {}

# Iterate over each combination of independent variables
for combo in combinations:
    # Train a logistic regression model
    model = LinearRegression(max_iter=1000) # Increase max_iter if the algorithm does not converge
    model.fit(X_train[list(combo)], y_train)

    # Make predictions using the test set
    y_pred = model.predict(X_test[list(combo)])

    # Calculate accuracy score
    accuracy = accuracy_score(y_test, y_pred)

    # Store the accuracy score in the dictionary
    accuracy_scores[combo] = accuracy

# Find the combination with the highest accuracy score
best_combo = max(accuracy_scores, key=accuracy_scores.get)

print("Best combination of independent variables:", best_combo)
print("Highest accuracy score:", accuracy_scores[best_combo])

TypeError: 'list' object is not callable

# Neural Network: Model Train/Test

In [None]:
from sklearn.neural_network import MLPRegressor
from itertools import chain, combinations


# Independent variables
independent_vars = ["temperature", "humidity", "pressure", "tvoc", "state"]

# Function to get all combinations of the independent variables
def all_combinations(variables):
    return list(chain(*map(lambda x: combinations(variables, x), range(1, len(variables) + 1))))

# Get all combinations of independent variables
combinations = all_combinations(independent_vars)

# Dependent variable
dependent_var = "radon"

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(df[independent_vars], df[dependent_var], test_size=0.2, random_state=42)

# Initialize a dictionary to store R-squared scores
r2_scores = {}

# Iterate over each combination of independent variables
for combo in combinations:
    # Train a neural network regression model
    model = MLPRegressor(hidden_layer_sizes=(50, 50), max_iter=500, random_state=42) # Adjust hyperparameters as needed
    model.fit(X_train[list(combo)], y_train)

    # Make predictions using the test set
    y_pred = model.predict(X_test[list(combo)])

    # Calculate R-squared score
    r2 = r2_score(y_test, y_pred)

    # Store the R-squared score in a dictionary
    r2_scores[combo] = r2
    
    #Find the combination with the highest R-squared score
    best_combo = max(r2_scores, key=r2_scores.get)

print("Best combination of independent variables:", best_combo)
print("Highest R-squared score:", r2_scores[best_combo])

Best combination of independent variables: ('temperature', 'humidity', 'pressure', 'state')
Highest R-squared score: 0.2045970236774428


# Decision Trees

In [None]:
df

Unnamed: 0,radon,temperature,humidity,pressure,tvoc
36000,0.119640,0.714286,0.666667,0.500000,0.003463
36001,0.117911,0.714286,0.666667,0.500000,0.001732
36002,0.123098,0.714286,0.666667,0.500000,0.005195
36003,0.119640,0.714286,0.666667,0.500000,0.006926
36004,0.119640,0.714286,0.666667,0.500000,0.008658
...,...,...,...,...,...
47995,0.044952,1.000000,0.481481,0.541667,0.054545
47996,0.043568,1.000000,0.481481,0.541667,0.056277
47997,0.055325,1.000000,0.481481,0.541667,0.053680
47998,0.055325,1.000000,0.481481,0.541667,0.046753


### Decision Trees: Model Train/Test w/Cross-Validation

In [None]:
import xgboost as xgb
from itertools import chain, combinations
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Independent variables
independent_vars = ["temperature", "humidity", "pressure", "tvoc"]

# Function to get all combinations of the independent variables
def all_combinations(variables):
    return list(chain(*map(lambda x: combinations(variables, x), range(1, len(variables) + 1))))

# Get all combinations of independent variables
combinations = all_combinations(independent_vars)

# Dependent variable
dependent_var = "radon"

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(df[independent_vars], df[dependent_var], test_size=0.2, random_state=42)

# Initialize a dictionary to store mean cross-validation scores
mean_cv_scores = {}

# Number of folds for cross-validation
cv_folds = 5

# Iterate over each combination of independent variables
for combo in combinations:
    # Train an XGBoost regression model
    model = xgb.XGBClassifier(objective='reg:squarederror', random_state=42) # Adjust hyperparameters as needed
    
    # Perform cross-validation and calculate mean score
    cv_scores = cross_val_score(model, X_train[list(combo)], y_train, cv=cv_folds, scoring='r2')
    mean_cv_score = np.mean(cv_scores)
    
    # Store the mean cross-validation score in a dictionary
    mean_cv_scores[combo] = mean_cv_score

# Find the combination with the highest mean cross-validation score
best_combo = max(mean_cv_scores, key=mean_cv_scores.get)

print("Best combination of independent variables:", best_combo)
print("Highest mean cross-validation score:", mean_cv_scores[best_combo])

# Train the final model using the best combination of independent variables and the entire training set
best_model = xgb.XGBClassifier(objective='reg:squarederror', random_state=42) # Adjust hyperparameters as needed
best_model.fit(X_train[list(best_combo)], y_train)

# Make predictions using the test set
y_pred = best_model.predict(X_test[list(best_combo)])

# Calculate R-squared score
r2 = r2_score(y_test, y_pred)

print("R-squared score on the test set:", r2)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error on the test set:", mse)

ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/sklearn.py", line 1440, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
 306 307 308 309 310 311 312 313 314 315 316], got [0.         0.00138313 0.00207469 0.00276625 0.00345781 0.00414938
 0.00484094 0.0055325  0.00622407 0.00691563 0.00760719 0.00829876
 0.00899032 0.0093361  0.01037344 0.01106501 0.01141079 0.01210235
 0.01279391 0.01348548 0.01417704 0.0148686  0.01556017 0.01625173
 0.01694329 0.01763485 0.01832642 0.01901798 0.01970954 0.02040111
 0.02109267 0.02178423 0.0224758  0.02316736 0.02385892 0.02455048
 0.02524205 0.02558783 0.02662517 0.02697095 0.02766252 0.02869986
 0.02904564 0.02973721 0.03042877 0.03112033 0.03181189 0.03250346
 0.03319502 0.03388658 0.03457815 0.03526971 0.03596127 0.03665284
 0.0373444  0.03803596 0.03872752 0.03941909 0.04011065 0.04080221
 0.04149378 0.04218534 0.0428769  0.04356846 0.04426003 0.04495159
 0.04564315 0.04633472 0.04702628 0.04771784 0.04840941 0.04910097
 0.04979253 0.05048409 0.05117566 0.05186722 0.05255878 0.05325035
 0.05394191 0.05463347 0.05532503 0.0560166  0.05670816 0.05739972
 0.05809129 0.05878285 0.05947441 0.06016598 0.06085754 0.0615491
 0.06224066 0.06293223 0.06362379 0.06431535 0.06500692 0.06569848
 0.06639004 0.0670816  0.06777317 0.06846473 0.06915629 0.06984786
 0.07053942 0.07123098 0.07192254 0.07261411 0.07330567 0.07399723
 0.0746888  0.07538036 0.07607192 0.07676349 0.07745505 0.07814661
 0.07883817 0.07987552 0.0802213  0.08125864 0.08160443 0.08264177
 0.08333333 0.0840249  0.08471646 0.08540802 0.08609959 0.08679115
 0.08748271 0.08817427 0.08886584 0.0895574  0.09024896 0.09094053
 0.09163209 0.09232365 0.09301521 0.09370678 0.09439834 0.0950899
 0.09578147 0.09647303 0.09751037 0.09785615 0.0988935  0.09958506
 0.09993084 0.10096819 0.10165975 0.10235131 0.10304288 0.10373444
 0.104426   0.10511757 0.10580913 0.10650069 0.10719225 0.10788382
 0.10857538 0.10926694 0.11030429 0.11065007 0.11168741 0.11237898
 0.11272476 0.1137621  0.11445367 0.11514523 0.11583679 0.11652835
 0.11721992 0.11791148 0.11860304 0.11964039 0.11998617 0.12102351
 0.12171508 0.12240664 0.1230982  0.12378976 0.12448133 0.12517289
 0.12586445 0.12655602 0.12759336 0.12793914 0.12897649 0.12966805
 0.13035961 0.13105118 0.13174274 0.1324343  0.13312586 0.13381743
 0.13450899 0.13554633 0.1362379  0.13692946 0.13762102 0.13831259
 0.13900415 0.13969571 0.14038728 0.14107884 0.1417704  0.14280775
 0.14349931 0.14419087 0.14488243 0.145574   0.14626556 0.14695712
 0.14764869 0.14868603 0.14903181 0.15006916 0.15076072 0.15145228
 0.15214385 0.15283541 0.15352697 0.15421853 0.1549101  0.15594744
 0.156639   0.15733057 0.15802213 0.15871369 0.15940526 0.16009682
 0.16113416 0.16182573 0.16251729 0.16320885 0.16390041 0.16459198
 0.16528354 0.16632089 0.16701245 0.16770401 0.16839557 0.16908714
 0.1697787  0.17047026 0.17116183 0.17219917 0.17289073 0.1735823
 0.17427386 0.17496542 0.17565698 0.17669433 0.17738589 0.17807746
 0.17876902 0.17946058 0.18015214 0.18395574 0.1846473  0.18568465
 0.18637621 0.1884509  0.18914246 0.19605809 0.20366528 0.21196404
 0.21887967 0.24757953 0.25311203 0.29253112 0.32261411 0.34163209
 0.34232365 0.36825726 0.39211618 0.41251729 0.4291148  0.44156293
 0.46473029 0.47268326 0.47994467 0.48720609 0.48962656 0.49723375
 0.51106501 0.52489627 0.55843707 0.58748271 0.61272476 0.6137621
 0.61964039 0.63416321 0.64211618 0.66735823 0.67116183 0.67219917
 0.69502075 0.71749654 0.73858921 0.75103734 0.75207469 0.75622407
 0.78526971 0.80152144 0.81051176 0.84024896 0.87828492 0.88070539
 0.90698479 0.92427386 0.9253112  0.93257261 0.94640387 0.95470263
 0.9664592  0.96991701 0.97233748 0.97614108 1.        ]

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/sklearn.py", line 1440, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
 306 307 308 309 310 311], got [0.00138313 0.00207469 0.00276625 0.00345781 0.00414938 0.00484094
 0.0055325  0.00622407 0.00691563 0.00760719 0.00829876 0.00899032
 0.0093361  0.01037344 0.01106501 0.01141079 0.01210235 0.01279391
 0.01348548 0.01417704 0.0148686  0.01556017 0.01625173 0.01694329
 0.01763485 0.01832642 0.01901798 0.01970954 0.02040111 0.02109267
 0.02178423 0.0224758  0.02316736 0.02385892 0.02455048 0.02524205
 0.02558783 0.02662517 0.02697095 0.02766252 0.02869986 0.02904564
 0.02973721 0.03042877 0.03112033 0.03181189 0.03250346 0.03319502
 0.03388658 0.03457815 0.03526971 0.03596127 0.03665284 0.0373444
 0.03803596 0.03872752 0.03941909 0.04011065 0.04080221 0.04149378
 0.04218534 0.0428769  0.04356846 0.04426003 0.04495159 0.04564315
 0.04633472 0.04702628 0.04771784 0.04840941 0.04910097 0.04979253
 0.05048409 0.05117566 0.05186722 0.05255878 0.05325035 0.05394191
 0.05463347 0.05532503 0.0560166  0.05670816 0.05739972 0.05809129
 0.05878285 0.05947441 0.06016598 0.06085754 0.0615491  0.06224066
 0.06293223 0.06362379 0.06431535 0.06500692 0.06569848 0.06639004
 0.0670816  0.06777317 0.06846473 0.06915629 0.06984786 0.07053942
 0.07123098 0.07192254 0.07261411 0.07330567 0.07399723 0.0746888
 0.07538036 0.07607192 0.07676349 0.07745505 0.07814661 0.07883817
 0.07987552 0.0802213  0.08125864 0.08160443 0.08229599 0.08264177
 0.08333333 0.0840249  0.08471646 0.08540802 0.08609959 0.08679115
 0.08748271 0.08817427 0.08886584 0.0895574  0.09024896 0.09059474
 0.09094053 0.09163209 0.09232365 0.09301521 0.09370678 0.09439834
 0.0950899  0.09578147 0.09647303 0.09751037 0.09785615 0.0988935
 0.09958506 0.09993084 0.10096819 0.10165975 0.10235131 0.10304288
 0.10373444 0.104426   0.10511757 0.10580913 0.10650069 0.10719225
 0.10788382 0.10857538 0.10926694 0.11030429 0.11065007 0.11168741
 0.11237898 0.11272476 0.1137621  0.11445367 0.11514523 0.11583679
 0.11652835 0.11721992 0.11791148 0.11860304 0.11964039 0.11998617
 0.12102351 0.12171508 0.12240664 0.1230982  0.12378976 0.12448133
 0.12517289 0.12586445 0.12655602 0.12759336 0.12793914 0.12897649
 0.12966805 0.13035961 0.13105118 0.13174274 0.1324343  0.13312586
 0.13381743 0.13450899 0.13554633 0.1362379  0.13692946 0.13762102
 0.13831259 0.13900415 0.13969571 0.14038728 0.14107884 0.1417704
 0.14280775 0.14349931 0.14419087 0.14488243 0.145574   0.14626556
 0.14695712 0.14764869 0.14868603 0.14903181 0.15006916 0.15076072
 0.15145228 0.15214385 0.15283541 0.15352697 0.15421853 0.1549101
 0.15594744 0.156639   0.15733057 0.15802213 0.15871369 0.15940526
 0.16009682 0.16113416 0.16182573 0.16251729 0.16320885 0.16390041
 0.16459198 0.16528354 0.16632089 0.16701245 0.16770401 0.16839557
 0.16908714 0.1697787  0.17047026 0.17116183 0.17219917 0.17289073
 0.1735823  0.17427386 0.17496542 0.17565698 0.17669433 0.17738589
 0.17807746 0.17876902 0.17946058 0.18015214 0.1846473  0.18568465
 0.1884509  0.18914246 0.19605809 0.20124481 0.20504841 0.21196404
 0.21887967 0.24757953 0.29840941 0.32261411 0.34163209 0.36825726
 0.39211618 0.41182573 0.44156293 0.47268326 0.47994467 0.49723375
 0.51106501 0.52385892 0.52489627 0.55394191 0.55843707 0.58748271
 0.5988935  0.6137621  0.63416321 0.64211618 0.65836791 0.66735823
 0.67116183 0.67219917 0.69502075 0.71542185 0.71749654 0.73858921
 0.74481328 0.75103734 0.78526971 0.80152144 0.81051176 0.84024896
 0.87828492 0.88070539 0.90698479 0.92427386 0.9253112  0.9384509
 0.94640387 0.94986169 0.95677732 0.9664592  0.97233748 1.        ]

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/sklearn.py", line 1440, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
 306 307 308 309 310 311 312 313 314 315 316 317], got [0.         0.00138313 0.00207469 0.00276625 0.00345781 0.00414938
 0.00484094 0.0055325  0.00622407 0.00691563 0.00760719 0.00829876
 0.00899032 0.0093361  0.01037344 0.01106501 0.01141079 0.01210235
 0.01279391 0.01348548 0.01417704 0.0148686  0.01556017 0.01625173
 0.01694329 0.01763485 0.01832642 0.01901798 0.01970954 0.02040111
 0.02109267 0.02178423 0.0224758  0.02316736 0.02385892 0.02455048
 0.02524205 0.02558783 0.02662517 0.02697095 0.02766252 0.02869986
 0.02904564 0.02973721 0.03042877 0.03112033 0.03181189 0.03250346
 0.03319502 0.03388658 0.03457815 0.03526971 0.03596127 0.03665284
 0.0373444  0.03803596 0.03872752 0.03941909 0.04011065 0.04080221
 0.04149378 0.04218534 0.0428769  0.04356846 0.04426003 0.04495159
 0.04564315 0.04633472 0.04702628 0.04771784 0.04840941 0.04910097
 0.04979253 0.05048409 0.05117566 0.05186722 0.05255878 0.05325035
 0.05394191 0.05463347 0.05532503 0.0560166  0.05670816 0.05739972
 0.05809129 0.05878285 0.05947441 0.06016598 0.06085754 0.0615491
 0.06224066 0.06293223 0.06362379 0.06431535 0.06500692 0.06569848
 0.06639004 0.0670816  0.06777317 0.06846473 0.06915629 0.06984786
 0.07053942 0.07123098 0.07192254 0.07261411 0.07330567 0.07399723
 0.0746888  0.07538036 0.07607192 0.07676349 0.07745505 0.07814661
 0.07883817 0.07987552 0.0802213  0.08125864 0.08160443 0.08229599
 0.08264177 0.08333333 0.0840249  0.08471646 0.08540802 0.08609959
 0.08679115 0.08748271 0.08817427 0.08886584 0.0895574  0.09024896
 0.09059474 0.09094053 0.09163209 0.09232365 0.09301521 0.09370678
 0.09439834 0.0950899  0.09578147 0.09647303 0.09751037 0.09785615
 0.0988935  0.09958506 0.09993084 0.10096819 0.10165975 0.10235131
 0.10304288 0.10373444 0.104426   0.10511757 0.10580913 0.10650069
 0.10719225 0.10788382 0.10857538 0.10926694 0.11030429 0.11065007
 0.11168741 0.11237898 0.11272476 0.1137621  0.11445367 0.11514523
 0.11583679 0.11652835 0.11721992 0.11791148 0.11860304 0.11964039
 0.11998617 0.12102351 0.12171508 0.12240664 0.1230982  0.12378976
 0.12448133 0.12517289 0.12586445 0.12655602 0.12759336 0.12793914
 0.12897649 0.12966805 0.13035961 0.13105118 0.13174274 0.1324343
 0.13312586 0.13381743 0.13450899 0.13554633 0.1362379  0.13692946
 0.13762102 0.13831259 0.13900415 0.13969571 0.14038728 0.14107884
 0.1417704  0.14280775 0.14349931 0.14419087 0.14488243 0.145574
 0.14626556 0.14695712 0.14764869 0.14868603 0.14903181 0.15006916
 0.15076072 0.15145228 0.15214385 0.15283541 0.15352697 0.15421853
 0.1549101  0.15594744 0.156639   0.15733057 0.15802213 0.15871369
 0.15940526 0.16009682 0.16113416 0.16182573 0.16251729 0.16320885
 0.16390041 0.16459198 0.16528354 0.16632089 0.16701245 0.16770401
 0.16839557 0.16908714 0.1697787  0.17047026 0.17116183 0.17219917
 0.17289073 0.1735823  0.17565698 0.17669433 0.17738589 0.17807746
 0.17876902 0.17946058 0.18015214 0.18395574 0.1846473  0.18568465
 0.18637621 0.1884509  0.18914246 0.19605809 0.20124481 0.20366528
 0.20504841 0.21196404 0.25311203 0.29253112 0.29840941 0.32261411
 0.34232365 0.39211618 0.41182573 0.41251729 0.4291148  0.46473029
 0.47268326 0.47994467 0.48720609 0.48962656 0.51106501 0.52385892
 0.55394191 0.58748271 0.5988935  0.61272476 0.6137621  0.61964039
 0.63416321 0.64211618 0.65836791 0.66735823 0.67116183 0.67219917
 0.71542185 0.71749654 0.74481328 0.75103734 0.75207469 0.75622407
 0.78526971 0.80152144 0.81051176 0.84024896 0.88070539 0.90698479
 0.9253112  0.93257261 0.9384509  0.94640387 0.94986169 0.95470263
 0.95677732 0.9664592  0.96991701 0.97233748 0.97614108 1.        ]

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/sklearn.py", line 1440, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
 306 307 308 309 310 311 312 313 314 315 316 317 318 319], got [0.         0.00138313 0.00207469 0.00276625 0.00345781 0.00414938
 0.00484094 0.0055325  0.00622407 0.00691563 0.00760719 0.00829876
 0.00899032 0.0093361  0.01037344 0.01106501 0.01141079 0.01210235
 0.01279391 0.01348548 0.01417704 0.0148686  0.01556017 0.01625173
 0.01694329 0.01763485 0.01832642 0.01901798 0.01970954 0.02040111
 0.02109267 0.02178423 0.0224758  0.02316736 0.02385892 0.02455048
 0.02524205 0.02558783 0.02662517 0.02697095 0.02766252 0.02869986
 0.02904564 0.02973721 0.03042877 0.03112033 0.03181189 0.03250346
 0.03319502 0.03388658 0.03457815 0.03526971 0.03596127 0.03665284
 0.0373444  0.03803596 0.03872752 0.03941909 0.04011065 0.04080221
 0.04149378 0.04218534 0.0428769  0.04356846 0.04426003 0.04495159
 0.04564315 0.04633472 0.04702628 0.04771784 0.04840941 0.04910097
 0.04979253 0.05048409 0.05117566 0.05186722 0.05255878 0.05325035
 0.05394191 0.05463347 0.05532503 0.0560166  0.05670816 0.05739972
 0.05809129 0.05878285 0.05947441 0.06016598 0.06085754 0.0615491
 0.06224066 0.06293223 0.06362379 0.06431535 0.06500692 0.06569848
 0.06639004 0.0670816  0.06777317 0.06846473 0.06915629 0.06984786
 0.07053942 0.07123098 0.07192254 0.07261411 0.07330567 0.07399723
 0.0746888  0.07538036 0.07607192 0.07676349 0.07745505 0.07814661
 0.07883817 0.07987552 0.0802213  0.08125864 0.08160443 0.08229599
 0.08264177 0.08333333 0.0840249  0.08471646 0.08540802 0.08609959
 0.08679115 0.08748271 0.08817427 0.08886584 0.0895574  0.09024896
 0.09059474 0.09094053 0.09163209 0.09232365 0.09301521 0.09370678
 0.09439834 0.0950899  0.09578147 0.09647303 0.09751037 0.09785615
 0.0988935  0.09958506 0.09993084 0.10096819 0.10165975 0.10235131
 0.10304288 0.10373444 0.104426   0.10511757 0.10580913 0.10650069
 0.10719225 0.10788382 0.10857538 0.10926694 0.11030429 0.11065007
 0.11168741 0.11237898 0.11272476 0.1137621  0.11445367 0.11514523
 0.11583679 0.11652835 0.11721992 0.11791148 0.11860304 0.11964039
 0.11998617 0.12102351 0.12171508 0.12240664 0.1230982  0.12378976
 0.12448133 0.12517289 0.12586445 0.12655602 0.12759336 0.12793914
 0.12897649 0.12966805 0.13035961 0.13105118 0.13174274 0.1324343
 0.13312586 0.13381743 0.13450899 0.13554633 0.1362379  0.13692946
 0.13762102 0.13831259 0.13900415 0.13969571 0.14038728 0.14107884
 0.1417704  0.14280775 0.14349931 0.14419087 0.14488243 0.145574
 0.14626556 0.14695712 0.14764869 0.14868603 0.14903181 0.15006916
 0.15076072 0.15145228 0.15214385 0.15283541 0.15352697 0.15421853
 0.1549101  0.15594744 0.156639   0.15733057 0.15802213 0.15871369
 0.15940526 0.16009682 0.16113416 0.16182573 0.16251729 0.16320885
 0.16390041 0.16459198 0.16528354 0.16632089 0.16770401 0.16839557
 0.16908714 0.1697787  0.17047026 0.17116183 0.17219917 0.1735823
 0.17427386 0.17496542 0.17565698 0.17669433 0.17738589 0.17807746
 0.17876902 0.17946058 0.18015214 0.18395574 0.1846473  0.18568465
 0.18637621 0.1884509  0.18914246 0.20124481 0.20366528 0.20504841
 0.21887967 0.24757953 0.25311203 0.29253112 0.29840941 0.34163209
 0.34232365 0.36825726 0.41182573 0.41251729 0.4291148  0.44156293
 0.46473029 0.47268326 0.48720609 0.48962656 0.49723375 0.51106501
 0.52385892 0.52489627 0.55394191 0.55843707 0.58748271 0.5988935
 0.61272476 0.61964039 0.64211618 0.65836791 0.67116183 0.67219917
 0.69502075 0.71542185 0.71749654 0.73858921 0.74481328 0.75103734
 0.75207469 0.75622407 0.78526971 0.81051176 0.84024896 0.87828492
 0.88070539 0.90698479 0.92427386 0.93257261 0.9384509  0.94640387
 0.94986169 0.95470263 0.95677732 0.9664592  0.96991701 0.97233748
 0.97614108 1.        ]

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/opt/homebrew/lib/python3.10/site-packages/xgboost/sklearn.py", line 1440, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
 306 307 308 309 310 311 312 313 314 315 316 317 318], got [0.         0.00138313 0.00207469 0.00276625 0.00345781 0.00414938
 0.00484094 0.0055325  0.00622407 0.00691563 0.00760719 0.00829876
 0.00899032 0.0093361  0.01037344 0.01106501 0.01141079 0.01210235
 0.01279391 0.01348548 0.01417704 0.0148686  0.01556017 0.01625173
 0.01694329 0.01763485 0.01832642 0.01901798 0.01970954 0.02040111
 0.02109267 0.02178423 0.0224758  0.02316736 0.02385892 0.02455048
 0.02524205 0.02558783 0.02662517 0.02697095 0.02766252 0.02869986
 0.02904564 0.02973721 0.03042877 0.03112033 0.03181189 0.03250346
 0.03319502 0.03388658 0.03457815 0.03526971 0.03596127 0.03665284
 0.0373444  0.03803596 0.03872752 0.03941909 0.04011065 0.04080221
 0.04149378 0.04218534 0.0428769  0.04356846 0.04426003 0.04495159
 0.04564315 0.04633472 0.04702628 0.04771784 0.04840941 0.04910097
 0.04979253 0.05048409 0.05117566 0.05186722 0.05255878 0.05325035
 0.05394191 0.05463347 0.05532503 0.0560166  0.05670816 0.05739972
 0.05809129 0.05878285 0.05947441 0.06016598 0.06085754 0.0615491
 0.06224066 0.06293223 0.06362379 0.06431535 0.06500692 0.06569848
 0.06639004 0.0670816  0.06777317 0.06846473 0.06915629 0.06984786
 0.07053942 0.07123098 0.07192254 0.07261411 0.07330567 0.07399723
 0.0746888  0.07538036 0.07607192 0.07676349 0.07745505 0.07814661
 0.07883817 0.07987552 0.0802213  0.08125864 0.08160443 0.08229599
 0.08264177 0.08333333 0.0840249  0.08471646 0.08540802 0.08609959
 0.08679115 0.08748271 0.08817427 0.08886584 0.0895574  0.09024896
 0.09059474 0.09094053 0.09163209 0.09232365 0.09301521 0.09370678
 0.09439834 0.0950899  0.09578147 0.09647303 0.09751037 0.09785615
 0.0988935  0.09958506 0.09993084 0.10096819 0.10165975 0.10235131
 0.10304288 0.10373444 0.104426   0.10511757 0.10580913 0.10650069
 0.10719225 0.10788382 0.10857538 0.10926694 0.11030429 0.11065007
 0.11168741 0.11237898 0.11272476 0.1137621  0.11445367 0.11514523
 0.11583679 0.11652835 0.11721992 0.11791148 0.11860304 0.11964039
 0.11998617 0.12102351 0.12171508 0.12240664 0.1230982  0.12378976
 0.12448133 0.12517289 0.12586445 0.12655602 0.12759336 0.12793914
 0.12897649 0.12966805 0.13035961 0.13105118 0.13174274 0.1324343
 0.13312586 0.13381743 0.13450899 0.13554633 0.1362379  0.13692946
 0.13762102 0.13831259 0.13900415 0.13969571 0.14038728 0.14107884
 0.1417704  0.14280775 0.14349931 0.14419087 0.14488243 0.145574
 0.14626556 0.14695712 0.14764869 0.14868603 0.14903181 0.15006916
 0.15076072 0.15145228 0.15214385 0.15283541 0.15352697 0.15421853
 0.1549101  0.15594744 0.156639   0.15733057 0.15802213 0.15871369
 0.15940526 0.16009682 0.16113416 0.16182573 0.16251729 0.16320885
 0.16390041 0.16459198 0.16528354 0.16632089 0.16701245 0.16770401
 0.16839557 0.16908714 0.17047026 0.17116183 0.17219917 0.17289073
 0.1735823  0.17427386 0.17496542 0.17565698 0.17669433 0.17738589
 0.17807746 0.17876902 0.17946058 0.18015214 0.18395574 0.1846473
 0.18637621 0.1884509  0.18914246 0.19605809 0.20124481 0.20366528
 0.20504841 0.21196404 0.21887967 0.24757953 0.25311203 0.29253112
 0.29840941 0.32261411 0.34163209 0.34232365 0.36825726 0.39211618
 0.41182573 0.41251729 0.4291148  0.44156293 0.46473029 0.47994467
 0.48720609 0.48962656 0.49723375 0.52385892 0.52489627 0.55394191
 0.55843707 0.5988935  0.61272476 0.6137621  0.61964039 0.63416321
 0.64211618 0.65836791 0.66735823 0.69502075 0.71542185 0.73858921
 0.74481328 0.75207469 0.75622407 0.80152144 0.81051176 0.87828492
 0.88070539 0.92427386 0.9253112  0.93257261 0.9384509  0.94640387
 0.94986169 0.95470263 0.95677732 0.9664592  0.96991701 0.97614108
 1.        ]


# Decision Trees: Model Train/Test

In [None]:
import xgboost as xgb
from itertools import chain, combinations
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import math

# Independent variables
independent_vars = ["temperature", "humidity", "pressure", "tvoc"]

# Function to get all combinations of the independent variables
def all_combinations(variables):
    return list(chain(*map(lambda x: combinations(variables, x), range(1, len(variables) + 1))))

# Get all combinations of independent variables
combinations = all_combinations(independent_vars)

# Dependent variable
dependent_var = "radon"

# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(df[independent_vars], df[dependent_var], test_size=0.2, random_state=42)

# Initialize a dictionary to store R-squared scores
r2_scores = {}
mae_scores = {}
mse_scores = {}

# Iterate over each combination of independent variables
for combo in combinations:
    # Train an XGBoost regression model
    model = xgb.XGBClassifier(objective='reg:squarederror', random_state=42) # Adjust hyperparameters as needed
    model.fit(X_train[list(combo)], y_train)

    # Make predictions using the test set
    y_pred = model.predict(X_test[list(combo)])

    # Calculate R-squared score
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)

    # Store the R-squared score in a dictionary
    r2_scores[combo] = r2
    mae_scores[combo] = mae
    mse_scores[combo] = mse

# Find the combination with the highest R-squared score
best_combo = max(r2_scores, key=r2_scores.get)

#print("Best combination of independent variables:", best_combo)
#print("Highest R-squared score:", r2_scores[best_combo])
#print("Mean absolute error:", mae_scores[best_combo])
#print("Mean squared error:", mse_scores[best_combo])

Best combination of independent variables: ('temperature', 'humidity', 'pressure', 'tvoc')
Highest R-squared score: 0.6134782471496891
Mean absolute error: 0.02014222705939061
Mean squared error: 0.0009671201704714494


## Best Model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df[independent_vars], df[dependent_var], test_size=0.2, random_state=42)

summer_min = df_summer.min()
summer_max = df_summer.max()

r2_best_scores = {}
mae_best_scores = {}
mse_best_scores = {}

best_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
best_model.fit(X_train[list(best_combo)], y_train)

y_pred = best_model.predict(X_test[list(best_combo)])

r2_best = r2_score(y_test, y_pred)
mae_best = mean_absolute_error(y_test, y_pred)
mse_best = mean_squared_error(y_test, y_pred)

mse_unnormalized = mse_best * (summer_max["radon"] - summer_min["radon"])**2
mae_unnormalized = mae_best * (summer_max["radon"] - summer_min["radon"])

print("Best combination of independent variables:", best_combo)
print("Highest R-squared score:", r2_best)
print("Mean Squared Error - Unnormalized:", mse_unnormalized)
print("Mean Absolute Error - Unnormalized:", mae_unnormalized)
print("Mean Squared Error:", mse_best)
print("Mean Absolute Error:", mae_best)

Best combination of independent variables: ('temperature', 'humidity', 'pressure', 'tvoc')
Highest R-squared score: 0.6134782471496891
Mean Squared Error - Unnormalized: 8088.668153445924
Mean Absolute Error - Unnormalized: 58.25132065575764
Mean Squared Error: 0.0009671201704714494
Mean Absolute Error: 0.02014222705939061


### Predicting new Data

In [None]:
summer_min = df_summer.min()
summer_max = df_summer.max()

independent_min = df_summer.drop(columns=["radon"]).min()
independent_max = df_summer.drop(columns=["radon"]).max()

new_data = {"temperature": 25, "humidity": 50, "pressure": 1017, "tvoc": 2}

new_data_df = pd.DataFrame(new_data, index=[0])

new_data_normalized = (new_data_df - independent_min) / (independent_max - independent_min)

prediction = best_model.predict(new_data_normalized.values.reshape(1, -1))[0]
##prediction = best_model.predict(pd.DataFrame([new_data_normalized])[list(best_combo)])

unnormalized_prediction = prediction * (summer_max["radon"] - summer_min["radon"]) + summer_min["radon"]

print("Predicted radon concentration", unnormalized_prediction)

Predicted radon concentration 322.53340280056


### Predicting New Data: Function

In [None]:
def predict_radon(temp, humidity, pressure, tvoc):
    summer_min = df_summer.min()
    summer_max = df_summer.max()

    independent_min = df_summer.drop(columns=["radon"]).min()
    independent_max = df_summer.drop(columns=["radon"]).max()

    new_data = {"temperature": temp, "humidity": humidity, "pressure": pressure, "tvoc": tvoc}

    new_data_df = pd.DataFrame(new_data, index=[0])

    new_data_normalized = (new_data_df - independent_min) / (independent_max - independent_min)

    prediction = best_model.predict(new_data_normalized.values.reshape(1, -1))[0]
    ##prediction = best_model.predict(pd.DataFrame([new_data_normalized])[list(best_combo)])

    unnormalized_prediction = prediction * (summer_max["radon"] - summer_min["radon"]) + summer_min["radon"]

    print("Predicted radon concentration", unnormalized_prediction)
    return unnormalized_prediction

In [None]:
predict_radon(25, 50, 1017, 2)

Predicted radon concentration 322.53340280056


322.53340280056