In [37]:
!pip install MKLpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [38]:
# Import librarys
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.kernel_approximation import Nystroem
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, confusion_matrix
from sklearn.metrics import classification_report
from MKLpy.algorithms import EasyMKL ,AverageMKL
from MKLpy.metrics.pairwise import homogeneous_polynomial_kernel as hpk
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score
#from mklaren.mkl.simplemkl import SimpleMKL --> not supported

In [39]:
# Generate custom dataset with 5000 samples, 8 features, and 2 classes
X, y = make_classification(n_samples=5000, n_features=8, n_classes=2)

# Split the dataset into 80% training data and 20% validation data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Printing the shape ofthe sets
print(f'shape of the x-train: {X_train.shape}')
print(f'shape of the y-train: {y_train.shape}')
print(f'shape of the x-val: {X_val.shape}')
print(f'shape of the y-val: {y_val.shape}')

shape of the x-train: (4000, 8)
shape of the y-train: (4000,)
shape of the x-val: (1000, 8)
shape of the y-val: (1000,)


In [40]:
# Define the parameter grid to search over
param_grid = {
    'C': [0.1,0.5,0.9,1,10,30,50,80,100],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto']
}

# Create an SVM object
svm = SVC()

# Create a GridSearchCV object
grid_search = GridSearchCV(svm, param_grid, cv=5)

# Fit the data to the GridSearchCV object
grid_search.fit(X, y)

# Get the best parameters and the best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Print the best parameters found by GridSearchCV and score
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_}")

Best parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best score: 0.9226000000000001


In [41]:
# Make predictions on the validation set
y_pred = best_model.predict(X_val)

# Evaluate the model's performance
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.91      0.94       517
           1       0.91      0.96      0.93       483

    accuracy                           0.94      1000
   macro avg       0.94      0.94      0.93      1000
weighted avg       0.94      0.94      0.94      1000



In [46]:
# Fit the EasyMKL model on the training data with different polynomial kernels
K_train = [hpk(X_train, degree=d) for d in range(10)]
K_test = [hpk(X_val,X_train, degree=d) for d in range(10)]

In [43]:
# MKL algorithms --> EasyMKL()
base_learner = SVC(C=0.1)

# lam is a hyper-parameter in [0,1]
EasyMKL = EasyMKL(lam=1.,learner=base_learner)
EasyMKL = EasyMKL.fit(K_train, y_train)

In [44]:
# Evaluate the solution --> EasyMKL()
y_pred = EasyMKL.predict(K_test)
y_score = EasyMKL.decision_function(K_test)

accuracy = accuracy_score(y_val, y_pred)
roc_auc = roc_auc_score(y_val, y_score)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
confusion_mat = confusion_matrix(y_val, y_pred)

print ('The EasyMKL:')
print (f'Accuracy score: {accuracy}, roc AUC score: {roc_auc}')
print (f'Precision score: {precision}')
print (f'Recall score: {recall}')
print (f'F1 score: {f1}')
print (f'Confusion Matrix: {confusion_mat}')

The EasyMKL:
Accuracy score: 0.802, roc AUC score: 0.8488813067906499
Precision score: 0.7821782178217822
Recall score: 0.8178053830227743
F1 score: 0.7995951417004049
Confusion Matrix: [[407 110]
 [ 88 395]]


In [47]:
# MKL algorithms --> AverageMKL()
AverageMKL = AverageMKL().fit(K_train,y_train)

# The combined kernel matrix
K_average = AverageMKL.solution.ker_matrix
print (AverageMKL.solution.weights)

tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000])


In [49]:
# Evaluate the solution --> AverageMKL()
y_pred = AverageMKL.predict(K_test)
y_score = AverageMKL.decision_function(K_test)

accuracy = accuracy_score(y_val, y_pred)
roc_auc = roc_auc_score(y_val, y_score)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
confusion_mat = confusion_matrix(y_val, y_pred)

print ('The AverageMKL:')
print (f'Accuracy score: {accuracy}, roc AUC score: {roc_auc}')
print (f'Precision score: {precision}')
print (f'Recall score: {recall}')
print (f'F1 score: {f1}')
print (f'Confusion Matrix: {confusion_mat}')

The AverageMKL:
Accuracy score: 0.814, roc AUC score: 0.8492697558377484
Precision score: 0.8049281314168378
Recall score: 0.8115942028985508
F1 score: 0.8082474226804125
Confusion Matrix: [[422  95]
 [ 91 392]]


In [51]:
# Creating the MKL like using VotingClassifier
# Define individual kernels
linear_kernel = SVC(kernel='linear')
rbf_kernel = SVC(kernel='rbf')
poly_kernel = SVC(kernel='poly')

# Define the MKL model with Nystroem approximation
mkl_model = VotingClassifier(estimators=[('linear', linear_kernel),
                                         ('rbf', rbf_kernel),
                                         ('poly', poly_kernel)])

# Create a pipeline with Nystroem kernel approximation
pipeline = Pipeline([
    ('nystroem', Nystroem()),
    ('mkl', mkl_model)
])

In [52]:
# Fit the MKL model on the training data
pipeline.fit(X_train, y_train)

# Predict on the testing data
y_pred = pipeline.predict(X_val)

# Calculate evaluation metrics
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
confusion_mat = confusion_matrix(y_val, y_pred)

# Print the evaluation metrics
print ('The VotingClassifier:')
print (f'Accuracy score: {accuracy}')
print (f'Precision score: {precision}')
print (f'Recall score: {recall}')
print (f'F1 score: {f1}')
print (f'Confusion Matrix: {confusion_mat}')

The VotingClassifier:
Accuracy score: 0.921
Precision score: 0.8869731800766284
Recall score: 0.9585921325051759
F1 score: 0.9213930348258706
Confusion Matrix: [[458  59]
 [ 20 463]]
