## Hello World of ML

In [6]:
import sklearn
from sklearn import tree

feature = [[150, 0], [140, 1], [160, 1], [120, 0]] # 0 - Smooth & 1 - Rough
label = ['Apple', 'Orange', 'Orange', 'Apple']
clf = tree.DecisionTreeClassifier()
clf.fit(feature, label)

prediction = clf.predict([[190, 0]])
print(prediction)

['Apple']


## Working with Iris Dataset

In [9]:
from sklearn.datasets import load_iris as iris
from sklearn.tree import DecisionTreeClassifier as DTC
import numpy as np

# Set iris dataset
ir = iris()

# Extract Testing data
test_id = [0, 50, 100]
test_data = ir.data[test_id]
test_label = ir.target[test_id]

# Delete test data from training data
train_data = np.delete(ir.data, test_id, axis = 0)
train_label = np.delete(ir.target, test_id)

# Train Classifier
clf = DTC()
clf.fit(train_data, train_label)

# Print prediction vs actual value
print(clf.predict(test_data))
print(test_label)

[0 1 2]
[0 1 2]


## Check metrics

In [15]:
from sklearn.datasets import load_iris as iris
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import train_test_split
import numpy as np

# Set iris dataset
ir = iris()

# Split Train and Test Data
train_data, test_data, train_label, test_label = train_test_split(ir.data, ir.target, test_size = 0.5)

# Train Decision Tree
dtc = DTC()
dtc.fit(train_data, train_label)

# Train K Nearest Neighbors
knn = KNN()
knn.fit(train_data, train_label)

# Predict Output
dtc_p = dtc.predict(test_data)
knn_p = knn.predict(test_data)

# Calculate Accuracy Score
print(accuracy_score(test_label, dtc_p))
print(accuracy_score(test_label, knn_p))

0.96
0.96


## Build your own Classifier

In [44]:
from sklearn.datasets import load_iris as iris
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import train_test_split
import random

# Build my Scrapy Classifier
class ScrapyClasifier:
    # This is my own classifier
    def __init__(self):
        pass
    
    def fit(self, data, label):
        self.data = data
        self.label = label
        self.features = len(data[0])
    
    def predict(self, data):
        # Random Prediction
        # prediction = [random.choice(self.label) for _ in data]
        prediction = []
        for test_data in data:
            distance = None
            index = None
            for idx, train_data in enumerate(self.data):
                new_distance = sum([(test_data[i] - train_data[i])**2 for i in range(self.features)])**(1/2)
                if distance is None or distance > new_distance:
                    distance = new_distance
                    index = idx
            prediction.append(self.label[index])
        return prediction

# Set iris dataset
ir = iris()

# Split Train and Test Data
train_data, test_data, train_label, test_label = train_test_split(ir.data, ir.target, test_size = 0.5)

# Train my Classifier
clf = ScrapyClasifier()
clf.fit(train_data, train_label)

# Predict Output
prediction = clf.predict(test_data)

# Calculate Accuracy Score
print(accuracy_score(test_label, prediction))

0.96
