In [29]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import math
import numpy as np
import os
import random as rnd
import sys


In [2]:
dfChips = pd.read_csv('chips.csv')
chipsTarget = dfChips['class']

dfGeyser = pd.read_csv('geyser.csv')
geyserTarget = dfGeyser['class']

In [3]:
XChips = dfChips[['x', 'y']]
yChips = dfChips['class']

In [4]:
XGeyser = dfGeyser[['x', 'y']]
yGeyser = dfGeyser['class']

In [5]:
Y = []
for val in yChips:
    if(val == 'N'):
        Y.append(-1)
    else:
        Y.append(1)
        
X = XChips.values.tolist()

X, Y = shuffle(X, Y)

In [6]:
def splitByBlock(X, Y, block, i):
    x_train = X[0:(i*block)] + X[(i*block) + block:]
    y_train = Y[0:(i*block)] + Y[(i*block) + block:]
    x_test = X[(i*block): (i*block) + block]
    y_test = Y[(i*block): (i*block) + block]
    return x_train, y_train, x_test, y_test

In [7]:
def printDots(x, y, bg_x, bg_y):
    n = len(x)
    m = len(bg_x)
    
    P_x1 = [x[i][0] for i in range(n) if y[i] >= 0]
    P_x2 = [x[i][1] for i in range(n) if y[i] >= 0]
            
    N_x1 = [x[i][0] for i in range(n) if y[i] < 0]
    N_x2 = [x[i][1] for i in range(n) if y[i] < 0]
    
    P_b1 = [bg_x[i][0] for i in range(m) if bg_y[i] >= 0]
    P_b2 = [bg_x[i][1] for i in range(m) if bg_y[i] >= 0]
            
    N_b1 = [bg_x[i][0] for i in range(m) if bg_y[i] < 0]
    N_b2 = [bg_x[i][1] for i in range(m) if bg_y[i] < 0]
    
    plt.scatter(P_x1, P_x2, marker='+', color='green')
    plt.scatter(N_x1, N_x2, marker='_', color='red')
    plt.scatter(P_b1, P_b2, marker='.', color='green', alpha = 0.2)
    plt.scatter(N_b1, N_b2, marker='.', color='red', alpha = 0.2)

In [35]:
class Simple_Classifier:
    
    def fit(self, ds, weights=None):
        if weights == None:
            weights = [1.0 / len(ds) for e in range(len(ds))]
        indices = [i for i in range(len(ds))]
        
        min_error = sys.maxsize
        
        best_f = -1
        best_b = None
        best_s = 0
        
        for f in range(len(ds[0]) - 1):
            ds, indices = zip(*sorted(zip(ds, indices), key = lambda d:d[0][f]))
            total_error = sum(weights)
            cur_error = sum([weights[i] for i in range(len(ds)) if ds[i][-1] == -1])
            
            for i in range(len(ds) - 1):
                index = indices[i]
                
                if ds[i][-1] == 1:
                    cur_error += weights[index]
                else:
                    cur_error -= weights[index]
                
                if ds[i][f] == ds[i + 1][f]:
                    continue
                
                if cur_error < min_error:
                    min_error = cur_error
                    best_f = f
                    best_b = (ds[i][f] + ds[i + 1][f]) / 2
                    best_s = 1
                if (total_error - cur_error) < min_error:
                    min_error = total_error - cur_error
                    best_f = f
                    best_b = (ds[i][f] + ds[i + 1][f]) / 2
                    best_s = -1
                    
        self.f = best_f
        self.b = best_b
        self.s = best_s
    def predict(self, d):
        if d[self.f] < self.b:
            return self.s
        else:
            return -1.0 * self.s

In [43]:
EPOCHS = 10

class Ada_Boost:
    def fit(self, ds):
        weights = [1.0 / len(ds) for e in range(len(ds))]
        for epoch in range(EPOCHS):
            simple = Simple_Classifier()
            simple.fit(ds, weights)
            error = 0
            for i in range(len(ds)):
                if simple.predict(ds[i][:-1]) != d[i][-1]:
                    error += weights[i]
#             error = sum([weights[i] for i in range(len(ds)) if simple.predict(ds[i][:-1]) != d[i][-1]])
            if error >= 0.5:
                break;
            alpha = 0.5 * math.log((1 - error) / error)
            
            ws = 0.0
            for i in range(len(ds)):
                weights[i] *= math.exp(-1.0 * alpha * ds[i][-1] * simple.predict(ds[i][:-1]))
                ws += weights[i]
            weight = [w / ws for w in weights]
            self.classifier = simple
            if error == 0:
                break
    def predict(self, d):
        return self.classifier.predict(d)

In [15]:
ds_chips = dfChips.to_numpy()
for d in ds_chips:
    d[-1] = -1 if d[-1] == 'N' else 1

In [27]:
booster = Ada_Boost()
ds_chips

array([[0.05126699999999999, 0.6995600000000001, 1],
       [-0.09274199999999999, 0.68494, 1],
       [-0.21370999999999998, 0.69225, 1],
       [-0.375, 0.5021899999999999, 1],
       [-0.51325, 0.46563999999999994, 1],
       [-0.52477, 0.2098, 1],
       [-0.39804, 0.034357, 1],
       [-0.30588000000000004, -0.19225, 1],
       [0.016705, -0.40424, 1],
       [0.13191, -0.51389, 1],
       [0.38537, -0.56506, 1],
       [0.5293800000000001, -0.5212, 1],
       [0.6388199999999999, -0.24341999999999997, 1],
       [0.73675, -0.18494000000000002, 1],
       [0.54666, 0.48757, 1],
       [0.322, 0.5826, 1],
       [0.16647, 0.53874, 1],
       [-0.046659, 0.81652, 1],
       [-0.17339000000000002, 0.6995600000000001, 1],
       [-0.47868999999999995, 0.63377, 1],
       [-0.60541, 0.59722, 1],
       [-0.62846, 0.33405999999999997, 1],
       [-0.5938899999999999, 0.005117, 1],
       [-0.42108, -0.27266, 1],
       [-0.11578, -0.39693, 1],
       [0.20104, -0.6016100000000001, 1],
 

In [44]:
booster.fit(ds_chips)

TypeError: 'float' object is not subscriptable

In [38]:
1.0 == 1

True