**Ensemble**

**Exercise 1.**

In [50]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from itertools import combinations


In [51]:
iris = load_iris()

data_set = iris.data[0:len(iris.target)-20,:]
labels = iris.target[0:len(iris.target)-20]
unique_labels = np.unique(iris.target)
test_data_set = iris.data[-20:,:]
test_labels = iris.target[-20:]

listOfClasifiers = [LinearRegression(), KNeighborsClassifier(), SVC(), DecisionTreeClassifier(), GaussianNB(),QuadraticDiscriminantAnalysis()]


In [52]:
def build_classifiers(data_set, labels, listOfClasifiers):

    return [clasifier.fit(data_set, labels) for clasifier in listOfClasifiers]

print(build_classifiers(data_set, labels, listOfClasifiers))

[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False), KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform'), SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best'), GaussianNB(priors=None, var_smoothing=1e-09), Quadr

In [53]:
def build_stacked_classifier(classifiers):
    output = []
    for classifier in classifiers:
        output.append(classifier.predict(data_set))
    output = np.array(output).reshape((130,3))
    
    stacked_classifier = DecisionTreeClassifier() # set here
    stacked_classifier.fit(output.reshape((130,3)), labels.reshape((130,)))
    test_set = []
    for classifier in classifiers:
        test_set.append(classifier.predict(test_data_set))
    test_set = np.array(test_set).reshape((len(test_set[0]),3))
    predicted = stacked_classifier.predict(test_set)
    return predicted

In [54]:
classifiers = build_classifiers(data_set, labels, listOfClasifiers)
myResults = []
for j in combinations(classifiers, 3):
  predicted = build_stacked_classifier(j)
  accuracy = accuracy_score(test_labels, predicted)
  myResults.append([j, accuracy])
  print(j)

maximumValue = max([j[1], index] for index, j in enumerate(myResults))
bestResult = myResults[maximumValue[1]]
print(bestResult)

(LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False), KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform'), SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False))
(LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False), KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform'), DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_i

**Exercise 2.**

In [55]:
# prepare data set

def generate_data(sample_number, feature_number, label_number):
    data_set = np.random.random_sample((sample_number, feature_number))
    labels = np.random.choice(label_number, sample_number)
    return data_set, labels

labels = 2
dimension = 2
test_set_size = 1000
train_set_size = 5000
train_set, train_labels = generate_data(train_set_size, dimension, labels)
test_set, test_labels = generate_data(test_set_size, dimension, labels)

# init weights
number_of_iterations = 10
weights = np.ones((test_set_size,)) / test_set_size


def train_model(classifier, weights):
    return classifier.fit(X=test_set, y=test_labels, sample_weight=weights)

def calculate_error(model):
    predicted = model.predict(test_set)
    I=calculate_accuracy_vector(predicted, test_labels)
    Z=np.sum(I)
    return (1+Z)/1.0


def calc_accuracy(predicted, labels):
    result = []

    for i in range(len(predicted)):
      result.append(0) if predicted[i] == labels[i] else result.append(1)

    return result

def set_new_weights(model):
    accuracy_vector = np.array(calc_accuracy(model.predict(test_set), test_labels))
    return (accuracy_vector + 1) / accuracy_vector.sum()

classifier = DecisionTreeClassifier(max_depth=1, random_state=1)
classifier.fit(X=train_set, y=train_labels)
alphas = []
classifiers = []
for iteration in range(number_of_iterations):
    model = train_model(classifier, weights)
    weights = set_new_weights(model)
    classifiers.append(model)

print(weights)


validate_x, validate_label = generate_data(1, dimension, labels)

[0.00394477 0.00394477 0.00394477 0.00197239 0.00394477 0.00394477
 0.00394477 0.00197239 0.00394477 0.00394477 0.00197239 0.00197239
 0.00197239 0.00197239 0.00394477 0.00197239 0.00394477 0.00394477
 0.00197239 0.00394477 0.00197239 0.00394477 0.00394477 0.00394477
 0.00197239 0.00197239 0.00394477 0.00394477 0.00197239 0.00197239
 0.00197239 0.00197239 0.00197239 0.00197239 0.00394477 0.00197239
 0.00394477 0.00394477 0.00394477 0.00197239 0.00197239 0.00197239
 0.00394477 0.00394477 0.00394477 0.00394477 0.00197239 0.00197239
 0.00197239 0.00197239 0.00394477 0.00394477 0.00197239 0.00197239
 0.00394477 0.00394477 0.00197239 0.00394477 0.00394477 0.00197239
 0.00197239 0.00394477 0.00197239 0.00394477 0.00197239 0.00394477
 0.00394477 0.00394477 0.00197239 0.00197239 0.00394477 0.00197239
 0.00394477 0.00197239 0.00394477 0.00394477 0.00197239 0.00394477
 0.00197239 0.00197239 0.00394477 0.00197239 0.00197239 0.00197239
 0.00394477 0.00197239 0.00394477 0.00394477 0.00394477 0.0019

In [56]:
validate_x, validate_label = generate_data(1, dimension, labels)

def get_prediction(x):
   return classifiers[-1].predict(x)

prediction = get_prediction(validate_x)[0]
print(prediction)

1
