In [34]:
from lazypredict.Supervised import LazyClassifier

In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [36]:
from sklearn.model_selection import train_test_split

In [37]:
data = pd.read_csv('../data/preprocessed/seeds_data.csv', index_col=[0])

In [38]:
from sklearn.preprocessing import PowerTransformer
from sklearn.pipeline import Pipeline
preprocessor_pipe = Pipeline([
    ('power_transformer', PowerTransformer())
])

X = preprocessor_pipe.fit_transform(data.drop(columns=['target'], axis=1).to_numpy())
Y = data['target'].to_numpy()

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=123)

In [40]:
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)

In [41]:
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

100%|██████████| 30/30 [00:00<00:00, 39.54it/s]






In [42]:
models

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LinearSVC,0.97,0.98,,0.98,0.01
LinearDiscriminantAnalysis,0.97,0.98,,0.98,0.01
XGBClassifier,0.97,0.98,,0.98,0.06
SGDClassifier,0.97,0.98,,0.98,0.01
RidgeClassifierCV,0.97,0.98,,0.98,0.01
RidgeClassifier,0.97,0.98,,0.98,0.01
RandomForestClassifier,0.97,0.98,,0.98,0.12
PassiveAggressiveClassifier,0.97,0.98,,0.98,0.01
NearestCentroid,0.97,0.98,,0.98,0.01
LogisticRegression,0.97,0.98,,0.98,0.02


In [43]:
from mlxtend.classifier import StackingCVClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LinearRegression, RidgeClassifierCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV

classifiers = (LinearDiscriminantAnalysis(), RidgeClassifierCV(), LinearSVC())

In [44]:
stacked_classifier = StackingCVClassifier(classifiers=classifiers, meta_classifier=RandomForestClassifier())

In [45]:
from sklearn.pipeline import Pipeline
clf_pipe = Pipeline([
    ('power_transformer', PowerTransformer()),
    ('stacked_classifier', stacked_classifier)
])

In [46]:
#Re initilise training and test data
X = data.drop(columns=['target'], axis=1)
Y = data['target'].to_numpy()

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=123)

In [48]:
clf_pipe.fit(X_train, y_train)


Pipeline(steps=[('power_transformer', PowerTransformer()),
                ('stacked_classifier',
                 StackingCVClassifier(classifiers=(LinearDiscriminantAnalysis(),
                                                   RidgeClassifierCV(alphas=array([ 0.1,  1. , 10. ])),
                                                   LinearSVC()),
                                      meta_classifier=RandomForestClassifier()))])

In [49]:
clf_pipe.score(X_test, y_test)

0.975

In [58]:
y_pred = clf_pipe.predict(X_test)

In [65]:
pred_df = pd.DataFrame({'Y-predictions': y_pred, 'Y-test': y_test})

In [69]:
pd.concat([pred_df.reset_index(), pd.DataFrame(X_test).reset_index()],axis=1).drop(columns=['index'],axis=1)

Unnamed: 0,Y-predictions,Y-test,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6
0,3,3,12.55,13.57,0.86,5.33,2.97,4.42,5.18
1,1,1,12.74,13.67,0.86,5.39,2.96,2.5,4.87
2,2,2,18.72,16.19,0.9,6.01,3.86,5.32,5.88
3,1,1,14.09,14.41,0.85,5.72,3.19,3.92,5.3
4,1,1,14.11,14.26,0.87,5.52,3.17,2.69,5.22
5,1,1,14.52,14.6,0.86,5.74,3.11,1.48,5.49
6,3,3,12.79,13.53,0.88,5.22,3.05,5.48,4.96
7,2,2,18.72,16.34,0.88,6.22,3.68,2.19,6.1
8,1,1,16.14,14.99,0.9,5.66,3.56,1.35,5.17
9,3,3,12.54,13.67,0.84,5.45,2.88,3.08,5.49


In [70]:
from joblib import dump

In [71]:
dump(clf_pipe, '../models/clf_pipe')

['../models/clf_pipe']

In [72]:
x_prediction_test = np.random.rand(100,7)

In [73]:
clf_pipe.predict(x_prediction_test)

array([2, 3, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1,
       2, 2, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2])

In [74]:
X_test

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6
157,12.55,13.57,0.86,5.33,2.97,4.42,5.18
26,12.74,13.67,0.86,5.39,2.96,2.5,4.87
77,18.72,16.19,0.9,6.01,3.86,5.32,5.88
31,14.09,14.41,0.85,5.72,3.19,3.92,5.3
20,14.11,14.26,0.87,5.52,3.17,2.69,5.22
52,14.52,14.6,0.86,5.74,3.11,1.48,5.49
187,12.79,13.53,0.88,5.22,3.05,5.48,4.96
95,18.72,16.34,0.88,6.22,3.68,2.19,6.1
4,16.14,14.99,0.9,5.66,3.56,1.35,5.17
154,12.54,13.67,0.84,5.45,2.88,3.08,5.49
