# Say hello world to ML

### Comparing apples and oranges

In [29]:
from sklearn import tree
features = [[140,1],[130,1],[150,0],[170,0]]
labels = ['apple','apple','orange','orange']
clf = tree.DecisionTreeClassifier()
clf = clf.fit(features, labels)
print(clf.predict([[140,1]]))

['apple']


## Write a pipeline

### Pipeline with a Decision Tree classifier

In [1]:
from sklearn import datasets
iris = datasets.load_iris()

X = iris.data
y = iris.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)

from sklearn import tree
my_classifier = tree.DecisionTreeClassifier()

my_classifier.fit(X_train, y_train)

predictions = my_classifier.predict(X_test)
# print(predictions)

from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))

0.9333333333333333


### Pipeline with a KNN classifier

In [2]:
from sklearn import datasets
iris = datasets.load_iris()

X = iris.data
y = iris.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)

from sklearn.neighbors import KNeighborsClassifier
my_classifier = KNeighborsClassifier()

my_classifier.fit(X_train, y_train)

predictions = my_classifier.predict(X_test)
# print(predictions)

from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))

0.9733333333333334


## Writing a basic classifier

### A random classifier

In [15]:
#######################################
# Random classifier #################

import random

class random_classifier():
    def fit(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
    
    def predict(self, x_test):
        predictions = []
        for row in x_test:
            label = random.choice(self.y_train)
            predictions.append(label)
        return predictions

from sklearn import datasets
iris = datasets.load_iris()

X = iris.data
y = iris.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)

from sklearn.neighbors import KNeighborsClassifier
my_classifier = random_classifier()

my_classifier.fit(X_train, y_train)

predictions = my_classifier.predict(X_test)
# print(predictions)

from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))


0.3333333333333333


### A simplified version of KNN classifier

In [22]:
#######################################
# Simplified KNN classifier ###########

from scipy.spatial import distance

def dist(a,b):
    return distance.euclidean(a,b)


class KNN_classifier():
    def fit(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
        
    def closest(self, row):
        min_dist = dist(row, self.x_train[0])
        min_index = 0
        for i in range(1, len(self.x_train)):
            d = dist(row, self.x_train[i])
            if d < min_dist:
                min_dist = d
                min_index = i
        return self.y_train[min_index]
    
    def predict(self, x_test):
        predictions = []
        for row in x_test:
            label = self.closest(row)
            predictions.append(label)
        return predictions

from sklearn import datasets
iris = datasets.load_iris()

X = iris.data
y = iris.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5)

from sklearn.neighbors import KNeighborsClassifier
my_classifier = KNN_classifier()

my_classifier.fit(X_train, y_train)

predictions = my_classifier.predict(X_test)
# print(predictions)

from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))


0.9466666666666667
