In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Common AI models

# Classic machine learning models

## Assignment 1
From the Sklearn library choose models of at least the following types, train them on the 6 imported datasets, evaluate their accuracy or R^2 and see which model works best on which dataset. (Note that there are both regression and classification sets)
* Tree
* Neural Network
* Neighbors
* Ensemble
* Naive Byes (classification only)
* Linear

## Assignment 2
Use XGBoost running on GPU to predict the same datasets. You can activate GPU acceleration in the Runtime tab:
Runtime -> Change runtime type -> Select GPU from the dropdown

In [2]:
from sklearn.datasets import load_iris, load_boston, load_diabetes, load_digits, load_wine, load_breast_cancer
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import train_test_split

# Example of how to load 
X = load_iris().data
y = load_iris().target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
#from sklearn.tree import ExtraTreeClassifier
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
#from sklearn.svm.classes import OneClassSVM
from sklearn.neural_network import MLPClassifier, MLPRegressor
#from sklearn.neighbors.classification import RadiusNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
#from sklearn.multioutput import ClassifierChain
#from sklearn.multioutput import MultiOutputClassifier
#from sklearn.multiclass import OutputCodeClassifier
#from sklearn.multiclass import OneVsOneClassifier
#from sklearn.multiclass import OneVsRestClassifier
#from sklearn.linear_model.stochastic_gradient import SGDClassifier
#from sklearn.linear_model.ridge import RidgeClassifierCV
#from sklearn.linear_model.ridge import RidgeClassifier
#from sklearn.linear_model.passive_aggressive import PassiveAggressiveClassifier    
#from sklearn.gaussian_process.gpc import GaussianProcessClassifier
#from sklearn.ensemble.voting_classifier import VotingClassifier
#from sklearn.ensemble.weight_boosting import AdaBoostClassifier
#from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
#from sklearn.ensemble.bagging import BaggingClassifier
#from sklearn.ensemble.forest import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
#from sklearn.naive_bayes import BernoulliNB
#from sklearn.calibration import CalibratedClassifierCV
from sklearn.naive_bayes import GaussianNB
#from sklearn.semi_supervised import LabelPropagation
#from sklearn.semi_supervised import LabelSpreading
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import LinearSVC, LinearSVR
from sklearn.linear_model import LogisticRegression, LinearRegression
#from sklearn.linear_model import LogisticRegressionCV
#from sklearn.naive_bayes import MultinomialNB  
#from sklearn.neighbors import NearestCentroid
#from sklearn.svm import NuSVC
from xgboost import XGBRegressor, XGBClassifier

classifiers = [DecisionTreeClassifier, MLPClassifier, KNeighborsClassifier, RandomForestClassifier, GaussianNB, LinearSVC, LogisticRegression, XGBClassifier]
regressors = [DecisionTreeRegressor, MLPRegressor, KNeighborsRegressor, RandomForestRegressor, LinearSVR, LinearRegression, XGBRegressor]

results = {}

data_loaders = [load_iris, load_boston, load_diabetes, load_digits, load_wine, load_breast_cancer]
for loader in data_loaders:
    loader_name = loader.__name__
    print(loader_name)
    results[loader_name] = {}
    X = loader().data
    y = loader().target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    if 'Classification' in loader().DESCR:
        models = classifiers
    else:
        models = regressors

    for m in models:
        model_name = m.__name__
        results[loader_name][model_name] = {}

        if "XGB" in model_name:
            m = m(tree_method='gpu_hist') #Make sure you're connected to a GPU - Edit > Notebook settings > Hardware accelerator > GPU
        else:
            m = m()
        fitted = m.fit(X=X_train, y=y_train)

        y_pred = fitted.predict(X_test)

        if 'Classification' in loader().DESCR:
            score = accuracy_score(y_test, y_pred)
        else: 
            score = r2_score(y_test, y_pred)

        results[loader_name][model_name]['score'] = score
        print(f'{model_name} accuracy: ', score)


In [4]:
for k, v in results.items():
    best_model = ''
    highest_score = -999
    for a, b, in v.items():
        if b['score'] > highest_score:
            best_model = a
            highest_score = b['score']
    print(f'Best model on {k} was {best_model} with score: {highest_score}')


Best model on load_iris was MLPClassifier with score: 1.0
Best model on load_boston was XGBRegressor with score: 0.8906424514798732
Best model on load_diabetes was LinearRegression with score: 0.510395426135144
Best model on load_digits was KNeighborsClassifier with score: 0.9932659932659933
Best model on load_wine was RandomForestClassifier with score: 1.0
Best model on load_breast_cancer was KNeighborsRegressor with score: 0.8497298630812877


In [5]:
results

{'load_boston': {'DecisionTreeRegressor': {'score': 0.7470461024618086},
  'KNeighborsRegressor': {'score': 0.5748334691810936},
  'LinearRegression': {'score': 0.7261570836552481},
  'LinearSVR': {'score': 0.41972419899006597},
  'MLPRegressor': {'score': 0.653274498486336},
  'RandomForestRegressor': {'score': 0.8676145772582776},
  'XGBRegressor': {'score': 0.8906424514798732}},
 'load_breast_cancer': {'DecisionTreeRegressor': {'score': 0.5825829530035771},
  'KNeighborsRegressor': {'score': 0.8497298630812877},
  'LinearRegression': {'score': 0.6911359869475926},
  'LinearSVR': {'score': 0.45424094072304155},
  'MLPRegressor': {'score': -0.8021623192267584},
  'RandomForestRegressor': {'score': 0.8484961638090539},
  'XGBRegressor': {'score': 0.8301625511124916}},
 'load_diabetes': {'DecisionTreeRegressor': {'score': -0.12530295293934302},
  'KNeighborsRegressor': {'score': 0.43975256620686554},
  'LinearRegression': {'score': 0.510395426135144},
  'LinearSVR': {'score': -0.4234395