# Classifying IRIS and more, the Data Oriented Approach

In [None]:
!pip install numpy
!pip install pandas
!pip install sklearn
!pip install matplotlib

In [75]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier


import matplotlib
import matplotlib.pyplot as plt
from pyrsistent import PRecord, field

### Task 2 Redux ###

In [None]:
### OLD CODE
class ML_Dataset():
    """ Should hold our data, targets, predictions, basically X and y...
    Should also be able to fill itself, once we need it. """
    X = None
    y = None
    y_pred = None

    def load_iris(self):        
        iris = datasets.load_iris()
        self.X = iris["data"][:,3:]  # petal width
        self.y = (iris["target"]==2).astype(np.int)


**Task 2**: Let us exchange the dataset. 

**Underlying Problem**: We're mixing code and data, or business logic and data. 

**DO**: The Data Oriented approach is simple, decouple data and business logic, data and code.

In [80]:
### THE DO Approach
class ML_Dataset():
    """ Should hold our data, targets, predictions, basically X and y...
    Should also be able to fill itself, once we need it. """
    X = None
    y = None
    y_pred = None

def load_iris(m: ML_Dataset):        
    iris = datasets.load_iris()
    m.X = iris["data"][:,3:]  # petal width
    m.y = (iris["target"]==2).astype(np.int)
    
## load old data
m = ML_Dataset()
load_iris(m)

## load something new
def load_new(m: ML_Dataset):
    m.X=np.array([[1],[2],[3],[4],[5]])
    m.y= np.array([1,0,0,1,1])

load_new(m)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  m.y = (iris["target"]==2).astype(np.int)


### Task 1 Redux

In [89]:
### OLD CODE
class ML_Dataset():
    """ Should hold our data, targets, predictions, basically X and y...
    Should also be able to fill itself, once we need it. """
    X = None
    y = None
    y_pred = None

    def load_iris(self):        
        iris = datasets.load_iris()
        self.X = iris["data"][:,3:]  # petal width
        self.y = (iris["target"]==2).astype(np.int)

m = ML_Dataset()
m.X = np.linspace(0,3,1000).reshape(-1,1)
m.X= 1
print(m.X)

1


Here's what we encountered at step 1 last time, X got overwritten because we thought we don't need it anymore.

**Task 1**: Let's compare the predictions vs. the old classes.

**Underlying Problem**: Stuff is mutable.

**DOP Solution**: Use mostly immutable (data) structures.

In [None]:
### NEW DOP Implementation
# We're using the module for immutable python pyrsistent here

class ML_Dataset(PRecord):
    X = field()
    y = field()
    y_pred = field()
    X_new = field()
     
def load_iris():
    iris = datasets.load_iris()
    r_1 = ML_Dataset()
    r_2=r_1.set(X=iris["data"][:,3:])
    r_3=r_2.set(y=iris["target"])
    return r_3

## Just for one, let's try it out!

r = load_iris()
r = r.set(X = np.linspace(0,3,1000).reshape(-1,1))

r.set(X="1")
# print(r.X)
# >> [0.        ]...... [0.01201201]
## Oh that doesn't work, let's try direct item assignment maybe?
r["X"]=1
## >> TypeError: 'ML_Dataset' object does not support item assignment
# Nice, so now this cannot happen accidentally.

### Task 3

In [None]:
### OLD CODE
class ML_CLF():
    """Should hold the classifier we want to use here,
    should also be able to fit and predict. """
    clf = None
    trial_note = "trial 1"
    
    def fit_clf(self, m: ML_Dataset):  
        self.clf = svm.SVC(gamma='scale', decision_function_shape='ovo', probability=True)
        self.clf.fit(m.X,m.y)

    def predict(self, X):
        return self.clf.predict(X)
        
    def write_preds(self,m):
        """Writes predictions into an ML_Dataset using this classifier"""
        m.y_pred = self.clf.predict_proba(m.X)

#### -------------------------------------------------------------------------------------------------------------------------------------------------
### Now let's create another classifier.

# Let's print it out to see the comment
c = ML_CLF()
print(c)

**Task 3**: Let's change the classifier class and add a "trial note" to it, we wanna be able to store a comment like "used KNN this time to try out 
...".

**Underlying problem:** We're using 2 special purpose classes to use 2 problems, throwing away the general purpose classes like dicts, arrays,... that are already well supplied with default functions.

**Solution**: Let's use one of the cool general purposes classes to solve this, like the "Map". (Which in Python is a dict)

While we're at this, we might as well separate code from data here as well...

In [103]:
### NEW FUN CODE ----------------------------------------------------------------
class ML_Dataset(PRecord):
    X = field()
    y = field()
    y_pred = field()
    X_new = field()
     
def load_iris():
    iris = datasets.load_iris()
    r_1 = ML_Dataset()
    r_2=r_1.set(X=iris["data"][:,3:])
    r_3=r_2.set(y=iris["target"])
    return r_3
    
class ML_Predictor(PRecord):
    clf = field()
    note = field()

    
def predict_stuff(m: ML_Predictor, d: ML_Dataset):
    
    m_2 = m.set(clf=m.clf.fit(r.X,r.y))
    # Al right! Now we got...
    # - m, as the initialized and unfitted CLF
    # - m_2 as the fitted predictor. From the outside, not easy to distinguish...

    d_2 = d.set(y_pred = m_2.clf.predict_proba(d.X_new))
    return d_2

### Our Program ----------------------------------------------------------------

r = load_iris()

r_2 = r.set(X_new = np.linspace(0,3,1000).reshape(-1,1))

c = ML_Predictor(clf=svm.SVC(gamma='scale', decision_function_shape='ovo', probability=True), note="This is trial 1; using SVC")
c_2 = c.set(clf=svm.SVC(gamma='scale', decision_function_shape='ovo', probability=True, kernel='poly'), note="This is trial 2\
; using SVC with polyn. kernel")
print(c)
print(c_2)
print(c.items()) # we can use all the default cool functions on this generic Map!

ML_Predictor(note='This is trial 1; using SVC', clf=SVC(decision_function_shape='ovo', probability=True))
ML_Predictor(note='This is trial 2; using SVC with polyn. kernel', clf=SVC(decision_function_shape='ovo', kernel='poly', probability=True))
pvector([('note', 'This is trial 1; using SVC'), ('clf', SVC(decision_function_shape='ovo', probability=True))])
