## Data acquisition

In [4]:
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
  
# Fetching the dataset. 
student_performance = fetch_ucirepo(id=320) 

categorical_columns = student_performance.variables[student_performance.variables['type'].isin(['Categorical', 'Binary'])]

# Some of the features are categorical - we'll need to encode them later on, we extract the indices of columns already.
indices = categorical_columns.index.tolist()
indices = [index for index in indices if index < 30]
   
X = student_performance.data.features
y = student_performance.data.targets

# Extracting the targets
G1_multiclass = y.iloc[:, 0]
G2_multiclass = y.iloc[:, 1]
G3_multiclass = y.iloc[:, 2]

# Move G1 and G2 to features for later use.
X_with_G1 = pd.concat([X, G1_multiclass], axis=1)
X_with_G1_G2 = pd.concat([X_with_G1, G2_multiclass], axis=1)

# Student's perfomance is in a 1-20 scale, we'll turn it into a binary classification problem - passing the class or failing it.
threshold = 10
G1 = np.where(G1_multiclass >= threshold, 1, 0)
G2 = np.where(G2_multiclass >= threshold, 1, 0)
G3 = np.where(G3_multiclass >= threshold, 1, 0)

# Although we will use the same data for all three groups, we need to get the same splits for each group.
G1_X_train, G1_X_test, G1_y_train, G1_y_test = train_test_split(X, G1, test_size=0.2, random_state=2137)
G2_X_train, G2_X_test, G2_y_train, G2_y_test = train_test_split(X, G2, test_size=0.2, random_state=2137)
G3_X_train, G3_X_test, G3_y_train, G3_y_test = train_test_split(X, G3, test_size=0.2, random_state=2137)
G3_X_with_G1_G2_train, G3_X_with_G1_G2_test, G3_y_with_G1_G2_train, G3_y_with_G1_G2_test = train_test_split(X_with_G1_G2, G3, test_size=0.2, random_state=2137)

## Data preprocessing and AdaBoost model preparation


In [5]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, OrdinalEncoder

model = AdaBoostClassifier()

# Defining the preprocessing steps. We will use them for both models.
preprocessing_steps = [
  # We need to convert the categorical features.
  ('encoder', ColumnTransformer(transformers=[('encoder', OrdinalEncoder(), indices)], remainder='passthrough')),
  # Normalizing the data.
  ('scaler', StandardScaler()),
]

qualification_steps = [
    ('model', AdaBoostClassifier(estimator=DecisionTreeClassifier(), random_state=2137, algorithm='SAMME')),
]

# Creating the pipeline for AdaBoostClassifier.
pipeline = Pipeline(steps=preprocessing_steps + qualification_steps)

## Find the best hyperparameters for AdaBoost model with RandomizedSearchCV

In [6]:
from sklearn.model_selection import RandomizedSearchCV

param_grid = {
    'model__n_estimators': np.arange(4, 128, 4),
    'model__learning_rate': np.arange(0.05, 1.0, 0.05),
    'model__estimator__max_depth': np.arange(1, 10, 1),
    'model__estimator__min_samples_leaf': np.arange(1, 10, 1),
}

# Create a RandomizedSearchCV object with the pipeline.
grid_search = RandomizedSearchCV(estimator=pipeline, param_distributions=param_grid, n_iter=1024, scoring='accuracy', n_jobs=-1, verbose=1, random_state=2137) 

# Fit the data to the GridSearchCV object.
bestModel = grid_search.fit(G1_X_train, G1_y_train).best_estimator_

# Get the best parameters and the best score.
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Make the predictions.
G1_pred = bestModel.predict(G1_X_test)
G2_pred = bestModel.predict(G2_X_test)
G3_pred = bestModel.predict(G3_X_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

Fitting 5 folds for each of 1024 candidates, totalling 5120 fits
Best Parameters: {'model__n_estimators': 88, 'model__learning_rate': 0.6000000000000001, 'model__estimator__min_samples_leaf': 4, 'model__estimator__max_depth': 1}
Best Score: 0.8362770724421209


## AdaBoosts model's metrics

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

accuracy_G1 = accuracy_score(y_true=G1_y_test, y_pred=G1_pred)
accuracy_G2 = accuracy_score(y_true=G2_y_test, y_pred=G2_pred)
accuracy_G3 = accuracy_score(y_true=G3_y_test, y_pred=G3_pred)
print("Accuracy on G1:", accuracy_G1)
print("Accuracy on G2:", accuracy_G2)
print("Accuracy on G3:", accuracy_G3)

precision_G1 = precision_score(y_true=G1_y_test, y_pred=G1_pred)
precision_G2 = precision_score(y_true=G2_y_test, y_pred=G2_pred)
precision_G3 = precision_score(y_true=G3_y_test, y_pred=G3_pred)
print("Precision on G1:", precision_G1)
print("Precision on G2:", precision_G2)
print("Precision on G3:", precision_G3)

recall_G1 = recall_score(y_true=G1_y_test, y_pred=G1_pred)
recall_G2 = recall_score(y_true=G2_y_test, y_pred=G2_pred)
recall_G3 = recall_score(y_true=G3_y_test, y_pred=G3_pred)
print("Recall on G1:", recall_G1)
print("Recall on G2:", recall_G2)
print("Recall on G3:", recall_G3)

f1_G1 = f1_score(y_true=G1_y_test, y_pred=G1_pred)
f1_G2 = f1_score(y_true=G2_y_test, y_pred=G2_pred)
f1_G3 = f1_score(y_true=G3_y_test, y_pred=G3_pred)
print("F1 Score on G1:", f1_G1)
print("F1 Score on G2:", f1_G2)
print("F1 Score on G3:", f1_G3)

conf_matrix_G1 = confusion_matrix(y_true=G1_y_test, y_pred=G1_pred)
conf_matrix_G2 = confusion_matrix(y_true=G2_y_test, y_pred=G2_pred)
conf_matrix_G3 = confusion_matrix(y_true=G3_y_test, y_pred=G3_pred)
print("Confusion Matrix on G1:\n", conf_matrix_G1)
print("Confusion Matrix on G2:\n", conf_matrix_G2)
print("Confusion Matrix on G3:\n", conf_matrix_G3)

target_names = ['Fail', 'Pass']
classification_report_G1 = classification_report(y_true=G1_y_test, y_pred=G1_pred, target_names=target_names)
classification_report_G2 = classification_report(y_true=G2_y_test, y_pred=G2_pred, target_names=target_names)
classification_report_G3 = classification_report(y_true=G3_y_test, y_pred=G3_pred, target_names=target_names)
print("Classification Report on G1:\n", classification_report_G1)
print("Classification Report on G2:\n", classification_report_G2)
print("Classification Report on G3:\n", classification_report_G3) 

Accuracy on G1: 0.8076923076923077
Accuracy on G2: 0.7846153846153846
Accuracy on G3: 0.8307692307692308
Precision on G1: 0.8301886792452831
Precision on G2: 0.8207547169811321
Precision on G3: 0.8962264150943396
Recall on G1: 0.9263157894736842
Recall on G2: 0.90625
Recall on G3: 0.8962264150943396
F1 Score on G1: 0.8756218905472637
F1 Score on G2: 0.8613861386138614
F1 Score on G3: 0.8962264150943396
Confusion Matrix on G1:
 [[17 18]
 [ 7 88]]
Confusion Matrix on G2:
 [[15 19]
 [ 9 87]]
Confusion Matrix on G3:
 [[13 11]
 [11 95]]
Classification Report on G1:
               precision    recall  f1-score   support

        Fail       0.71      0.49      0.58        35
        Pass       0.83      0.93      0.88        95

    accuracy                           0.81       130
   macro avg       0.77      0.71      0.73       130
weighted avg       0.80      0.81      0.80       130

Classification Report on G2:
               precision    recall  f1-score   support

        Fail       0

## Neural network approach


In [9]:
from keras_tuner import RandomSearch
from keras import models, layers, callbacks

# We'll use keras_tuner to search for the best hyperparameters.
def build_model(hp):
    model = models.Sequential([
        layers.Input((30,)),
    ])

    # We want to find the best starting size for the hidden layers.
    units = hp.Int('units', min_value=32, max_value=128, step=32)
    current_units = units

    # We want to find the best number of hidden layers.
    for i in range(1, hp.Int('hidden_layers', min_value=2, max_value=6, step=1)):

        # We want to find the best activation function for everu hidden layer.
        model.add(layers.Dense(units=current_units, activation=hp.Choice('activation', ['relu', 'sigmoid', 'softmax'])))
        # Batch normalization is a good idea.
        model.add(layers.BatchNormalization())
        # We want to find the best dropout rate for every hidden layer.
        model.add(layers.Dropout(hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
        # We want to find the best ratio of input to output units for every hidden layer.
        current_units = int(current_units // hp.Float('size_factor', min_value=1.0, max_value=4.0, step=0.5))

    # Since the problem is binary, we want to use a sigmoid activation function.
    model.add(layers.Dense(1, activation='sigmoid',))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

early_stopping = callbacks.EarlyStopping(monitor='accuracy', patience=8) 

# We need to get the data preprocessed before we can use it for training.
transformed_G1_X_train = Pipeline(steps=preprocessing_steps).fit_transform(G1_X_train, G1_y_train)
transformed_G1_X_test = Pipeline(steps=preprocessing_steps).fit_transform(G1_X_test, G1_y_test)

transformed_G2_X_test = Pipeline(steps=preprocessing_steps).fit_transform(G2_X_test, G2_y_test)
transformed_G3_X_test = Pipeline(steps=preprocessing_steps).fit_transform(G3_X_test, G3_y_test)

# Since there's numerous combinations of hyperparameters, we'll use a random search to find the best combination.
tuner = RandomSearch(hypermodel=build_model, objective='val_accuracy', max_trials=128, seed=2137, project_name='G1')
tuner.search(transformed_G1_X_train, G1_y_train, epochs=128, callbacks=[early_stopping], validation_data=(transformed_G1_X_test, G1_y_test), verbose=1)

model = tuner.get_best_models()[0]
print(tuner.get_best_hyperparameters()[0].values)

G1_pred = model.predict(transformed_G1_X_test)
G1_pred = np.where(G1_pred >= 0.5, 1, 0)

G2_pred = model.predict(transformed_G2_X_test)
G2_pred = np.where(G2_pred >= 0.5, 1, 0)

G3_pred = model.predict(transformed_G3_X_test)
G3_pred = np.where(G3_pred >= 0.5, 1, 0)

Trial 2 Complete [00h 00m 00s]

Best val_accuracy So Far: None
Total elapsed time: 00h 00m 00s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
32                |96                |units
6                 |4                 |hidden_layers
softmax           |sigmoid           |activation
0.1               |0.2               |dropout
3                 |4                 |size_factor

Epoch 1/128


Traceback (most recent call last):
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^

RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras_tuner/src/engine/hypermodel.py", line 149, in fit
    return model.fit(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Users/bigpoppe/UJ/PSI/.venv/lib/python3.12/site-packages/keras/src/layers/input_spec.py", line 227, in assert_input_compatibility
    raise ValueError(
ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 30, but received input with shape (None, 56)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 56), dtype=float32)
  • training=True
  • mask=None


# Neural network metrics

In [None]:
print(model.summary())
accuracy_G1 = accuracy_score(y_true=G1_y_test, y_pred=G1_pred)
accuracy_G2 = accuracy_score(y_true=G2_y_test, y_pred=G2_pred)
accuracy_G3 = accuracy_score(y_true=G3_y_test, y_pred=G3_pred)
print("Accuracy on G1:", accuracy_G1)
print("Accuracy on G2:", accuracy_G2)
print("Accuracy on G3:", accuracy_G3)

precision_G1 = precision_score(y_true=G1_y_test, y_pred=G1_pred)
precision_G2 = precision_score(y_true=G2_y_test, y_pred=G2_pred)
precision_G3 = precision_score(y_true=G3_y_test, y_pred=G3_pred)
print("Precision on G1:", precision_G1)
print("Precision on G2:", precision_G2)
print("Precision on G3:", precision_G3)

recall_G1 = recall_score(y_true=G1_y_test, y_pred=G1_pred)
recall_G2 = recall_score(y_true=G2_y_test, y_pred=G2_pred)
recall_G3 = recall_score(y_true=G3_y_test, y_pred=G3_pred)
print("Recall on G1:", recall_G1)
print("Recall on G2:", recall_G2)
print("Recall on G3:", recall_G3)

f1_G1 = f1_score(y_true=G1_y_test, y_pred=G1_pred)
f1_G2 = f1_score(y_true=G2_y_test, y_pred=G2_pred)
f1_G3 = f1_score(y_true=G3_y_test, y_pred=G3_pred)
print("F1 Score on G1:", f1_G1)
print("F1 Score on G2:", f1_G2)
print("F1 Score on G3:", f1_G3)

conf_matrix_G1 = confusion_matrix(y_true=G1_y_test, y_pred=G1_pred)
conf_matrix_G2 = confusion_matrix(y_true=G2_y_test, y_pred=G2_pred)
conf_matrix_G3 = confusion_matrix(y_true=G3_y_test, y_pred=G3_pred)
print("Confusion Matrix on G1:\n", conf_matrix_G1)
print("Confusion Matrix on G2:\n", conf_matrix_G2)
print("Confusion Matrix on G3:\n", conf_matrix_G3)

target_names = ['Fail', 'Pass']
classification_report_G1 = classification_report(y_true=G1_y_test, y_pred=G1_pred, target_names=target_names)
classification_report_G2 = classification_report(y_true=G2_y_test, y_pred=G2_pred, target_names=target_names)
classification_report_G3 = classification_report(y_true=G3_y_test, y_pred=G3_pred, target_names=target_names)
print("Classification Report on G1:\n", classification_report_G1)
print("Classification Report on G2:\n", classification_report_G2)
print("Classification Report on G3:\n", classification_report_G3) 

None
Accuracy on G1: 0.8307692307692308
Accuracy on G2: 0.7923076923076923
Accuracy on G3: 0.8538461538461538
Precision on G1: 0.8411214953271028
Precision on G2: 0.822429906542056
Precision on G3: 0.9065420560747663
Recall on G1: 0.9473684210526315
Recall on G2: 0.9166666666666666
Recall on G3: 0.9150943396226415
F1 Score on G1: 0.8910891089108911
F1 Score on G2: 0.8669950738916257
F1 Score on G3: 0.9107981220657277
Confusion Matrix on G1:
 [[18 17]
 [ 5 90]]
Confusion Matrix on G2:
 [[15 19]
 [ 8 88]]
Confusion Matrix on G3:
 [[14 10]
 [ 9 97]]
Classification Report on G1:
               precision    recall  f1-score   support

        Fail       0.78      0.51      0.62        35
        Pass       0.84      0.95      0.89        95

    accuracy                           0.83       130
   macro avg       0.81      0.73      0.76       130
weighted avg       0.83      0.83      0.82       130

Classification Report on G2:
               precision    recall  f1-score   support

     

As you can see, the neural network model isn't a significant improvement over the previous model. My hypothesis here is that the data we are providing isn't containing all the important features. Let's now use the data that also has G1 and G2 as features.

## AdaBoost model on new data

In [None]:
param_grid = {
    'model__n_estimators': np.arange(4, 128, 4),
    'model__learning_rate': np.arange(0.05, 1.0, 0.05),
    'model__estimator__max_depth': np.arange(1, 10, 1),
    'model__estimator__min_samples_leaf': np.arange(1, 10, 1),
}

grid_search = RandomizedSearchCV(estimator=pipeline, param_distributions=param_grid, n_iter=1024, scoring='accuracy', n_jobs=-1, verbose=1, random_state=2137) 

bestModel = grid_search.fit(G3_X_with_G1_G2_train, G3_y_with_G1_G2_train).best_estimator_

best_params = grid_search.best_params_
best_score = grid_search.best_score_

G3_pred = bestModel.predict(G3_X_with_G1_G2_test)

print("Best Parameters:", best_params)
print("Best Score:", best_score)

Fitting 5 folds for each of 1024 candidates, totalling 5120 fits
Best Parameters: {'model__n_estimators': 20, 'model__learning_rate': 0.9500000000000001, 'model__estimator__min_samples_leaf': 5, 'model__estimator__max_depth': 9}
Best Score: 0.9595780433159075


## AdaBoost model metrics

In [None]:
accuracy_G3 = accuracy_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Accuracy on G3:", accuracy_G3)

precision_G3 = precision_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Precision on G3:", precision_G3)

recall_G3 = recall_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Recall on G3:", recall_G3)

f1_G3 = f1_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("F1 Score on G3:", f1_G3)

conf_matrix_G3 = confusion_matrix(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Confusion Matrix on G3:\n", conf_matrix_G3)

target_names = ['Fail', 'Pass']
classification_report_G3 = classification_report(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred, target_names=target_names)
print("Classification Report on G3:\n", classification_report_G3) 

Accuracy on G3: 0.8769230769230769
Precision on G3: 0.9166666666666666
Recall on G3: 0.9339622641509434
F1 Score on G3: 0.9252336448598131
Confusion Matrix on G3:
 [[15  9]
 [ 7 99]]
Classification Report on G3:
               precision    recall  f1-score   support

        Fail       0.68      0.62      0.65        24
        Pass       0.92      0.93      0.93       106

    accuracy                           0.88       130
   macro avg       0.80      0.78      0.79       130
weighted avg       0.87      0.88      0.87       130



This model hasn't improved much. Let's see the neural network.

## Neural network on the new data

In [None]:
from keras_tuner import RandomSearch
from keras import models, layers, callbacks

def build_model(hp):
    model = models.Sequential([
        layers.Input((32,)),
    ])

    units = hp.Int('units', min_value=32, max_value=128, step=32)
    current_units = units

    for i in range(1, hp.Int('hidden_layers', min_value=2, max_value=6, step=1)):

        model.add(layers.Dense(units=current_units, activation=hp.Choice('activation', ['relu', 'sigmoid', 'softmax'])))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
        current_units = int(current_units // hp.Float('size_factor', min_value=1.0, max_value=4.0, step=0.5))

    model.add(layers.Dense(1, activation='sigmoid',))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

early_stopping = callbacks.EarlyStopping(monitor='accuracy', patience=8) 

transformed_G3_X_with_G1_G2_train = Pipeline(steps=preprocessing_steps).fit_transform(G3_X_with_G1_G2_train, G3_y_with_G1_G2_train)
transformed_G3_X_with_G1_G2_test = Pipeline(steps=preprocessing_steps).fit_transform(G3_X_with_G1_G2_test, G3_y_with_G1_G2_test)

tuner = RandomSearch(hypermodel=build_model, objective='val_accuracy', max_trials=128, seed=2137, project_name='G3_with_G1_G2')
tuner.search(transformed_G3_X_with_G1_G2_train, G3_y_with_G1_G2_train, epochs=128, callbacks=[early_stopping], validation_data=(transformed_G3_X_with_G1_G2_test, G3_y_with_G1_G2_test), verbose=1)

model = tuner.get_best_models()[0]
print(tuner.get_best_hyperparameters()[0].values)

G3_pred = model.predict(transformed_G3_X_with_G1_G2_test)
G3_pred = np.where(G3_pred >= 0.5, 1, 0)

Reloading Tuner from ./G3_with_G1_G2/tuner0.json
{'units': 96, 'hidden_layers': 4, 'activation': 'relu', 'dropout': 0.2, 'size_factor': 4.0}
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  saveable.load_own_variables(weights_store.get(inner_path))


## Model's metrics

In [None]:
print(model.summary())

accuracy_G3 = accuracy_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Accuracy on G3:", accuracy_G3)

precision_G3 = precision_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Precision on G3:", precision_G3)

recall_G3 = recall_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Recall on G3:", recall_G3)

f1_G3 = f1_score(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("F1 Score on G3:", f1_G3)

conf_matrix_G3 = confusion_matrix(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred)
print("Confusion Matrix on G3:\n", conf_matrix_G3)

target_names = ['Fail', 'Pass']
classification_report_G3 = classification_report(y_true=G3_y_with_G1_G2_test, y_pred=G3_pred, target_names=target_names)
print("Classification Report on G3:\n", classification_report_G3) 

None
Accuracy on G3: 0.9384615384615385
Precision on G3: 0.9537037037037037
Recall on G3: 0.9716981132075472
F1 Score on G3: 0.9626168224299065
Confusion Matrix on G3:
 [[ 19   5]
 [  3 103]]
Classification Report on G3:
               precision    recall  f1-score   support

        Fail       0.86      0.79      0.83        24
        Pass       0.95      0.97      0.96       106

    accuracy                           0.94       130
   macro avg       0.91      0.88      0.89       130
weighted avg       0.94      0.94      0.94       130

