## Headers

In [17]:
# Data preprocessing
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

 # Models
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier


# Metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import time

In [18]:
from data_treatment import train_df, test_df, \
                            treated_train_df, treated_test_df, \
                            new_features_train_df, new_features_test_df, \
                            targets_for_test_df
                            

# Decision Tree

## Crude

In [19]:

start_time = time.time()


# Assuming 'target' is the column to predict and the rest are features
X = train_df.drop(columns=['target'])
y = train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the decision tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on the validation set
y_pred = model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

Time elapsed: 3m 16.5s
--------------------------------------
Validation Accuracy: 0.50210
Validation F1 Macro Score: 0.50204


In [20]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.48068
Test F1 Macro Score: 0.48063


## Treated Dataframe

In [21]:


start_time = time.time()


# Assuming 'target' is the column to predict and the rest are features
X = treated_train_df.drop(columns=['target'])
y = treated_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the decision tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on the validation set
y_pred = model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')



Time elapsed: 19m 12.6s
--------------------------------------
Validation Accuracy: 0.50839
Validation F1 Macro Score: 0.50081


In [22]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = treated_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df[:len(test_predictions)], test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df[:len(test_predictions)], test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.49774
Test F1 Macro Score: 0.49437


## Only New Features

In [23]:


start_time = time.time()


# Assuming 'target' is the column to predict and the rest are features
X = new_features_train_df.drop(columns=['target'])
y = new_features_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the decision tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on the validation set
y_pred = model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')



Time elapsed: 8m 31.6s
--------------------------------------
Validation Accuracy: 0.50330
Validation F1 Macro Score: 0.50142


In [24]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = new_features_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df[:len(test_predictions)], test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df[:len(test_predictions)], test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.50254
Test F1 Macro Score: 0.49721


# Random Forest Classifier

## Crude

In [25]:
from sklearn.ensemble import RandomForestClassifier

start_time = time.time()

# Assuming 'target' is the column to predict and the rest are features
X = train_df.drop(columns=['target'])
y = train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = rf_model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

Time elapsed: 40m 31.3s
--------------------------------------
Validation Accuracy: 0.52253
Validation F1 Macro Score: 0.40895


In [26]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = rf_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.45986
Test F1 Macro Score: 0.44456


## Treated Dataframe

In [27]:
from sklearn.ensemble import RandomForestClassifier

start_time = time.time()

# Assuming 'target' is the column to predict and the rest are features
X = treated_train_df.drop(columns=['target'])
y = treated_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = rf_model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

Time elapsed: 47m 59.0s
--------------------------------------
Validation Accuracy: 0.51910
Validation F1 Macro Score: 0.46335


In [28]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = treated_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = rf_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

ValueError: Found input variables with inconsistent numbers of samples: [909616, 909529]

## Only New Features

In [None]:


start_time = time.time()


# Assuming 'target' is the column to predict and the rest are features
X = new_features_train_df.drop(columns=['target'])
y = new_features_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = rf_model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')



NameError: name 'new_features_train_df' is not defined

In [None]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = new_features_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = rf_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df[:len(test_predictions)], test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df[:len(test_predictions)], test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

# xGBoost

## Crude

In [None]:

start_time = time.time()

# Assuming 'target' is the column to predict and the rest are features
X = train_df.drop(columns=['target'])
y = train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the XGBoost model
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = xgb_model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

Parameters: { "use_label_encoder" } are not used.



Time elapsed: 0m 12.9s
--------------------------------------
Validation Accuracy: 0.52729
Validation F1 Macro Score: 0.44318


In [None]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = xgb_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.55726
Test F1 Macro Score: 0.44548


## New Features

In [None]:

start_time = time.time()

# Assuming 'target' is the column to predict and the rest are features
X = treated_train_df.drop(columns=['target'])
y = treated_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the XGBoost model
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = xgb_model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

Parameters: { "use_label_encoder" } are not used.



Time elapsed: 0m 19.5s
--------------------------------------
Validation Accuracy: 0.53091
Validation F1 Macro Score: 0.48096


In [None]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = treated_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = xgb_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.56668
Test F1 Macro Score: 0.44502


## Only New Features

In [None]:


start_time = time.time()


# Assuming 'target' is the column to predict and the rest are features
X = new_features_train_df.drop(columns=['target'])
y = new_features_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the XGBoost model
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = xgb_model.predict(X_val)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')



NameError: name 'time' is not defined

In [None]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = new_features_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = xgb_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

# Explainble Boost Machine

## Crude

In [None]:
from interpret.glassbox import ExplainableBoostingClassifier

start_time = time.time()

# Assuming 'target' is the column to predict and the rest are features
X = train_df.drop(columns=['target'])
y = train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the Explainable Boosting Classifier model
ebm_model = ExplainableBoostingClassifier(random_state=42)
ebm_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = ebm_model.predict(X_val)
float_y_pred = y_pred.astype(float)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, float_y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, float_y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

Time elapsed: 14m 15.0s
--------------------------------------
Validation Accuracy: 0.00000


ValueError: Mix of label input types (string and number)

In [None]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = treated_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = ebm_model.predict(X_test)
float_test_prediction = test_predictions.astype(float)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, float_test_prediction[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, float_test_prediction[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.57972
Test F1 Macro Score: 0.37788


In [None]:
from interpret import show


# Show the EBM explanation
ebm_global = ebm_model.explain_global()
show(ebm_global)
# Show local explanations
ebm_local = ebm_model.explain_local(X_val, y_val)
show(ebm_local)

## New Features

In [None]:

start_time = time.time()

# Assuming 'target' is the column to predict and the rest are features
X = treated_train_df.drop(columns=['target'])
y = treated_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the XGBoost model
ebm_model = ExplainableBoostingClassifier(random_state=42)
ebm_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = ebm_model.predict(X_val)
float_test_prediction = test_predictions.astype(float)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, float_y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, float_y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')

  warn(


Time elapsed: 15m 6.6s
--------------------------------------
Validation Accuracy: 0.52670
Validation F1 Macro Score: 0.42182


In [None]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = treated_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = ebm_model.predict(X_test)
float_test_prediction = test_predictions.astype(float)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, float_test_prediction[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, float_test_prediction[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.58042
Test F1 Macro Score: 0.39537


In [None]:
# Show the EBM explanation
ebm_global = ebm_model.explain_global()
show(ebm_global)
# Show local explanations
ebm_local = ebm_model.explain_local(X_val, y_val)
show(ebm_local)

## Only New Features

In [None]:


start_time = time.time()


# Assuming 'target' is the column to predict and the rest are features
X = new_features_train_df.drop(columns=['target'])
y = new_features_train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Create and train the XGBoost model
ebm_model = ExplainableBoostingClassifier(random_state=42)
ebm_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = ebm_model.predict(X_val)
float_test_prediction = test_predictions.astype(float)

end_time = time.time()
minutes = (end_time - start_time) // 60
seconds = (end_time - start_time) % 60
print(f'Time elapsed: {minutes:.0f}m {seconds:.1f}s')
print('--------------------------------------')
# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro:.5f}')



NameError: name 'time' is not defined

In [None]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = new_features_test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_predictions = ebm_model.predict(X_test)
float_test_prediction = test_predictions.astype(float)

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

In [None]:
# Show the EBM explanation
ebm_global = ebm_model.explain_global()
show(ebm_global)
# Show local explanations
ebm_local = ebm_model.explain_local(X_val, y_val)
show(ebm_local)