In [27]:
import numpy as np
import pandas as pd
import sys
sys.path.insert(0, '../SOM-LVQ')
import LVQ
from random import randrange

In [28]:
train = pd.read_csv(r"..\data\SD-2X_rocktype.csv")
x_train = train.iloc[:, 0:6].values
y_train = train.iloc[:, 6].values
y_train[y_train==4] = 0

In [29]:
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
x_train = minmax.fit_transform(x_train)

In [30]:
test = pd.read_csv(r"..\data\SD-3X_rocktype.csv")
x_test = test.iloc[:, 0:6].values
y_test = test.iloc[:, 6].values
y_test[y_test==4] = 0
x_test = minmax.transform(x_test)

In [31]:
# Create a random subsample from the dataset with replacement
def subsample(x, y, ratio):
    sample_x = list()
    sample_y = list()
    n_sample = round(len(x) * ratio)
    while len(sample_x) < n_sample:
        index = randrange(len(x))
        sample_x.append(x[index])
        sample_y.append(y[index])
    return [np.asarray(sample_x), np.asarray(sample_y)]

In [32]:
# Bootstrap Aggregation Algorithm
def bagging(x_train, y_train, x_test, sample_size, n_clfs):
    clfs = list()
    for i in range(n_clfs):
        sample = subsample(x_train, y_train, sample_size)
        lvq = LVQ.LVQ(sample[0], sample[1], n_classes=4, p_vectors=[], n_neurons=25)
        y_pred = [lvq.predict(instance) for instance in x_test]
        clfs.append(y_pred)
    return(np.asarray(clfs))

In [33]:
# Make a prediction with a list of bagged clfs
def bagging_predict(predictions, n_row, n_clfs):
    return [np.bincount([predictions[j][i] for j in range(n_clfs)]).argmax() for i in range(n_row)]

In [34]:
a = bagging(x_train, y_train, x_test, sample_size=0.5, n_clfs=7)
y_pred = bagging_predict(a, n_row=len(a[0]), n_clfs=7)

In [35]:
from sklearn.metrics import classification_report
print (classification_report(y_test,y_pred,target_names=['0', '1', '2', '3']))

             precision    recall  f1-score   support

          0       0.99      0.76      0.86       331
          1       0.96      0.95      0.95        95
          2       0.48      0.85      0.61        26
          3       0.69      0.89      0.78       205

avg / total       0.87      0.83      0.84       657



In [46]:
from sklearn.ensemble import BaggingClassifier

In [47]:
import sys
sys.path.insert(0, '../SOM-LVQ')
import sklearn_LVQ

In [48]:
lvq = sklearn_LVQ.LVQ(x = x_train, y = y_train, n_classes=4, p_vectors=[], n_neurons=25)
model = BaggingClassifier(base_estimator=lvq, n_estimators=10, max_samples=0.4)

In [49]:
model.fit(x_train, y_train)

BaggingClassifier(base_estimator=LVQ(epsilon=0.9, epsilon_dec_factor=0.001, n_classes=4, n_neurons=25,
  p_vectors=[<sklearn_LVQ.prototype object at 0x000002210BD52E80>, <sklearn_LVQ.prototype object at 0x000002210BD52E10>, <sklearn_LVQ.prototype object at 0x000002210BD52E48>, <sklearn_LVQ.prototype object at 0x00000...538],
       [0.36355, 0.57871, ..., 0.4261 , 0.42448]]),
  y=array([1, 1, ..., 3, 3], dtype=int64)),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=0.4, n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [50]:
print (classification_report(y_test, model.predict(x_test), target_names=['0', '1', '2', '3']))

             precision    recall  f1-score   support

          0       0.97      0.86      0.91       331
          1       0.97      0.96      0.96        95
          2       0.50      0.85      0.63        26
          3       0.79      0.86      0.82       205

avg / total       0.89      0.87      0.88       657

