<a href="https://colab.research.google.com/github/rtajeong/M3_new/blob/main/M3_lab33a_scikit_learn_object_rev1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Scikit-Learn API convention
- to show how scikit-learn objects are made
- all objects share a consistent and simple interface
- The main objects in sklearn are:
   - estimator E: to learn from data
     - E.fit(X,y) or E.fit(X)
   - predictor P: to make prediction for the data
     - P.predict(X)
     - (to quantify certainty of a prediction)
       - P.predict_proba(X) or P.decision_function(X)
   - transformer T: filtering or modifying the data
     - T.transform(X), or T.fit_transform(X)
   - goodness of the model: 
     -  score = model.score(X)


## Example 1 - transformer(unsupervised)

In [None]:
import numpy as np


In [None]:
def _mean_and_std(X, axis=0):
   
    X = np.asarray(X)
    mean_ = X.mean(axis)
    std_ = X.std(axis)

    return mean_, std_

In [None]:
data = np.array([[1,2,3],
                 [4,5,6]]); data

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
_mean_and_std(data, 0), _mean_and_std(data, 1)

((array([2.5, 3.5, 4.5]), array([1.5, 1.5, 1.5])),
 (array([2., 5.]), array([0.81649658, 0.81649658])))

In [None]:
class my_StandardScaler():

    def __init__(self):
        self.mean_, self.std_ = 0., 0.
        
    def fit(self, X, y=None):
        X = X.astype(np.float32)
        self.mean_, self.std_ = _mean_and_std(X, axis=0)
        return self

    def transform(self, X, y=None):
        X = X.astype(np.float32)
        X -= self.mean_
        X /= self.std_
        return X

    def fit_transform(self, X, y=None):
        X = X.astype(np.float32)
        self.mean_, self.std_ = _mean_and_std(X, axis=0)
        X -= self.mean_
        X /= self.std_
        return X

    def inverse_transform(self, X):
        X = X.astype(np.float32)
        X *= self.std_
        X += self.mean_
        return X

In [None]:
X = np.array([1,2,3,4,5,6,7,8,9,10])
sc = my_StandardScaler()

In [None]:
sc.mean_, sc.std_

(0.0, 0.0)

In [None]:
dir(sc)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'fit',
 'fit_transform',
 'inverse_transform',
 'mean_',
 'std_',
 'transform']

In [None]:
sc.fit(X)

<__main__.my_StandardScaler at 0x7f4f1ba98700>

In [None]:
sc.mean_, sc.std_

(5.5, 2.8722813)

In [None]:
X_sc = sc.transform(X); X_sc

array([-1.5666989 , -1.2185436 , -0.87038827, -0.52223295, -0.17407766,
        0.17407766,  0.52223295,  0.87038827,  1.2185436 ,  1.5666989 ],
      dtype=float32)

In [None]:
sc.inverse_transform(X_sc)

array([ 1.       ,  1.9999998,  3.       ,  4.       ,  5.       ,
        6.       ,  7.       ,  8.       ,  9.       , 10.       ],
      dtype=float32)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_sc = sc.fit_transform(X.reshape(-1,1))
X_sc

array([[-1.5666989 ],
       [-1.21854359],
       [-0.87038828],
       [-0.52223297],
       [-0.17407766],
       [ 0.17407766],
       [ 0.52223297],
       [ 0.87038828],
       [ 1.21854359],
       [ 1.5666989 ]])

## Example 2 - predictor (supervised)

In [None]:
import numpy as np
from sklearn.metrics import r2_score

# my own OLS(Ordinary Least Square)
class my_OLS ():

    def __init__(self):
        self.W_ = np.array([])

    def fit(self, X, y):
        for i in range(len(X)+1):
            np.append(self.W_, 0)
        X_b = np.concatenate([X, np.ones((X.shape[0], 1))], axis=1)
        self.W_ = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

    def score(self, X, y):
        pred = self.predict(X)
        return r2_score(pred, y)

    def predict(self, X):
        X_b = np.concatenate([X, np.ones((X.shape[0], 1))], axis=1)
        return X_b@self.W_

In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

X, y = make_regression(n_samples=100, n_features=2, noise=10, random_state=1)
X_sc = my_StandardScaler().fit_transform(X)

In [None]:
model = LinearRegression()        # create model
model.fit(X_sc,y)                    # train model
print("coefficients and bias: ", model.coef_, model.intercept_)
print(model.score(X_sc,y))
print(model.predict(X_sc[:5]))

coefficients and bias:  [29.528992 71.888596] 19.280819
0.981824980852882
[ 69.28674  -36.17117  -38.273346 -11.022961 -43.112976]


In [None]:
model = my_OLS()
model.fit(X_sc,y)  
print(model.W_)
print(model.score(X_sc,y))
print(model.predict(X_sc[:5]))

[29.52899691 71.88860026 19.28081954]
0.9814885355690101
[ 69.2867506  -36.17117773 -38.27335067 -11.02296913 -43.11298165]
