In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
feature = np.random.randint(1, 100, 100)

In [14]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [4]:
help(BaseEstimator)

Help on class BaseEstimator in module sklearn.base:

class BaseEstimator(builtins.object)
 |  Base class for all estimators in scikit-learn
 |  
 |  Notes
 |  -----
 |  All estimators should specify all the parameters that can be set
 |  at the class level in their ``__init__`` as explicit keyword
 |  arguments (no ``*args`` or ``**kwargs``).
 |  
 |  Methods defined here:
 |  
 |  __getstate__(self)
 |  
 |  __repr__(self, N_CHAR_MAX=700)
 |      Return repr(self).
 |  
 |  __setstate__(self, state)
 |  
 |  get_params(self, deep=True)
 |      Get parameters for this estimator.
 |      
 |      Parameters
 |      ----------
 |      deep : bool, default=True
 |          If True, will return the parameters for this estimator and
 |          contained subobjects that are estimators.
 |      
 |      Returns
 |      -------
 |      params : mapping of string to any
 |          Parameter names mapped to their values.
 |  
 |  set_params(self, **params)
 |      Set the parameters of this es

In [5]:
help(TransformerMixin)

Help on class TransformerMixin in module sklearn.base:

class TransformerMixin(builtins.object)
 |  Mixin class for all transformers in scikit-learn.
 |  
 |  Methods defined here:
 |  
 |  fit_transform(self, X, y=None, **fit_params)
 |      Fit to data, then transform it.
 |      
 |      Fits transformer to X and y with optional parameters fit_params
 |      and returns a transformed version of X.
 |      
 |      Parameters
 |      ----------
 |      X : {array-like, sparse matrix, dataframe} of shape                 (n_samples, n_features)
 |      
 |      y : ndarray of shape (n_samples,), default=None
 |          Target values.
 |      
 |      **fit_params : dict
 |          Additional fit parameters.
 |      
 |      Returns
 |      -------
 |      X_new : ndarray array of shape (n_samples, n_features_new)
 |          Transformed array.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      d

In [9]:
class Transformer:
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        
    def transform(self, X):
        return (X - self.mean_)/self.scale_

In [10]:
scale = Transformer()
scale.fit(feature)

In [11]:
scale.mean_

48.82

In [12]:
scale.scale_

29.57038383247671

In [19]:
scale.transform(feature)[:5]

array([ 0.85152767,  0.81771005, -1.07607666,  0.71625719, -0.83935333])

In [18]:
ss = StandardScaler()
ss.fit(feature.reshape(-1, 1))
ss.transform(feature.reshape(-1, 1))[:5]

array([[ 0.85152767],
       [ 0.81771005],
       [-1.07607666],
       [ 0.71625719],
       [-0.83935333]])

In [20]:
pipe = Pipeline([
    ('scale', Transformer())
])

In [29]:
class Transformer:
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None):
        print(X, y)
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        
    def transform(self, X, y=None):
        return (X - self.mean_)/self.scale_

In [30]:
pipe = Pipeline([
    ('scale', Transformer())
])
pipe.fit(feature)

[74 73 17 70 24 76 25 64 92  6  1  9  3 87 85 20 98 56 77 85 96 49 35 63
  1 49 64 17 88 75 47  6 44  1 94 97 49 67 14  6  6 85 13 42 98 68 85 11
 81 36 27 83 86 74 75 39 15 62 58 63 96 13  6 13 22 20  1 21 56 33 37 61
 32 56  7 94 76 59 53  4 53 39 72 70 85 57 39 67 53 39  8 54 65 46 65  1
 75 44 35 44] None


Pipeline(steps=[('scale', <__main__.Transformer object at 0x000002440D47CCA0>)])

In [32]:
pipe.transform(feature)[:5]

array([ 0.85152767,  0.81771005, -1.07607666,  0.71625719, -0.83935333])

In [34]:
class Transformer(TransformerMixin):
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        return self
    
    def transform(self, X, y=None):
        return (X - self.mean_)/self.scale_
    
    
pipe = Pipeline([
    ('scale', Transformer())
])
pipe.fit(feature)

Pipeline(steps=[('scale', <__main__.Transformer object at 0x000002440A0838B0>)])

In [35]:
pipe.fit_transform(feature)

array([ 0.85152767,  0.81771005, -1.07607666,  0.71625719, -0.83935333,
        0.91916291, -0.80553571,  0.51335147,  1.46024483, -1.44807048,
       -1.61715858, -1.34661762, -1.54952334,  1.29115673,  1.22352149,
       -0.9746238 ,  1.66315055,  0.24281051,  0.95298053,  1.22352149,
        1.59551531,  0.00608717, -0.46735951,  0.47953385, -1.61715858,
        0.00608717,  0.51335147, -1.07607666,  1.32497435,  0.88534529,
       -0.06154807, -1.44807048, -0.16300093, -1.61715858,  1.52788007,
        1.62933293,  0.00608717,  0.61480433, -1.17752952, -1.44807048,
       -1.44807048,  1.22352149, -1.21134714, -0.23063617,  1.66315055,
        0.64862195,  1.22352149, -1.27898238,  1.08825101, -0.43354189,
       -0.73790047,  1.15588625,  1.25733911,  0.85152767,  0.88534529,
       -0.33208903, -1.1437119 ,  0.44571623,  0.31044575,  0.47953385,
        1.59551531, -1.21134714, -1.44807048, -1.21134714, -0.90698857,
       -0.9746238 , -1.61715858, -0.94080618,  0.24281051, -0.53

In [36]:
class Transformer(TransformerMixin):
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        return self
    
    def transform(self, X, y=None):
        return (X - self.mean_)/self.scale_
    
scale = Transformer()

In [38]:
scale.fit_transform(feature)

array([ 0.85152767,  0.81771005, -1.07607666,  0.71625719, -0.83935333,
        0.91916291, -0.80553571,  0.51335147,  1.46024483, -1.44807048,
       -1.61715858, -1.34661762, -1.54952334,  1.29115673,  1.22352149,
       -0.9746238 ,  1.66315055,  0.24281051,  0.95298053,  1.22352149,
        1.59551531,  0.00608717, -0.46735951,  0.47953385, -1.61715858,
        0.00608717,  0.51335147, -1.07607666,  1.32497435,  0.88534529,
       -0.06154807, -1.44807048, -0.16300093, -1.61715858,  1.52788007,
        1.62933293,  0.00608717,  0.61480433, -1.17752952, -1.44807048,
       -1.44807048,  1.22352149, -1.21134714, -0.23063617,  1.66315055,
        0.64862195,  1.22352149, -1.27898238,  1.08825101, -0.43354189,
       -0.73790047,  1.15588625,  1.25733911,  0.85152767,  0.88534529,
       -0.33208903, -1.1437119 ,  0.44571623,  0.31044575,  0.47953385,
        1.59551531, -1.21134714, -1.44807048, -1.21134714, -0.90698857,
       -0.9746238 , -1.61715858, -0.94080618,  0.24281051, -0.53

In [39]:
class Transformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        return self
    
    def transform(self, X, y=None):
        return (X - self.mean_)/self.scale_
    
scale = Transformer()

In [41]:
scale.fit_transform(feature)[:5]  # TransforMixin

array([ 0.85152767,  0.81771005, -1.07607666,  0.71625719, -0.83935333])

In [42]:
scale.get_params()  # BaseEstimator

{}

In [50]:
scale.set_params(abc=10)

ValueError: Invalid parameter abc for estimator Transformer(). Check the list of available parameters with `estimator.get_params().keys()`.

In [55]:
ss.get_params()

{'copy': True, 'with_mean': True, 'with_std': True}

In [60]:
class Transformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        return self
    
    def transform(self, X, y=None):
        return (X - self.mean_)/self.scale_

pipe = Pipeline([
    ('scale', Transformer())
])
pipe.fit(feature)

Pipeline(steps=[('scale', Transformer())])

In [57]:
pipe.get_params()

{'memory': None,
 'steps': [('scale', Transformer())],
 'verbose': False,
 'scale': Transformer()}

In [68]:
class Transformer(BaseEstimator, TransformerMixin):
    def __init__(self, mean=None, scale=None):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None, a=None):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        return self
    
    def transform(self, X, y=None, a=None):
        return (X - self.mean_)/self.scale_
    
scale = Transformer()
scale.fit(feature)

Transformer()

In [69]:
scale.fit_transform(feature)

array([ 0.85152767,  0.81771005, -1.07607666,  0.71625719, -0.83935333,
        0.91916291, -0.80553571,  0.51335147,  1.46024483, -1.44807048,
       -1.61715858, -1.34661762, -1.54952334,  1.29115673,  1.22352149,
       -0.9746238 ,  1.66315055,  0.24281051,  0.95298053,  1.22352149,
        1.59551531,  0.00608717, -0.46735951,  0.47953385, -1.61715858,
        0.00608717,  0.51335147, -1.07607666,  1.32497435,  0.88534529,
       -0.06154807, -1.44807048, -0.16300093, -1.61715858,  1.52788007,
        1.62933293,  0.00608717,  0.61480433, -1.17752952, -1.44807048,
       -1.44807048,  1.22352149, -1.21134714, -0.23063617,  1.66315055,
        0.64862195,  1.22352149, -1.27898238,  1.08825101, -0.43354189,
       -0.73790047,  1.15588625,  1.25733911,  0.85152767,  0.88534529,
       -0.33208903, -1.1437119 ,  0.44571623,  0.31044575,  0.47953385,
        1.59551531, -1.21134714, -1.44807048, -1.21134714, -0.90698857,
       -0.9746238 , -1.61715858, -0.94080618,  0.24281051, -0.53

In [65]:
scale.get_params()



{'mean': None, 'scale': None}

In [66]:
scale.set_params(mean=10)

Transformer(mean=10)

In [71]:
class Transformer(BaseEstimator, TransformerMixin):
    def __init__(self, mean=None, scale=None):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None, a=None):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        return self
    
    def transform(self, X, y=None, a=None):
        return (X - self.mean_)/self.scale_

pipe = Pipeline([
    ('scale', Transformer())
])
pipe.fit(feature)

Pipeline(steps=[('scale', Transformer())])

In [77]:
pipe.fit(feature, 10, 10)

TypeError: fit() takes from 2 to 3 positional arguments but 4 were given

In [76]:
pipe.transform(feature)

array([ 0.85152767,  0.81771005, -1.07607666,  0.71625719, -0.83935333,
        0.91916291, -0.80553571,  0.51335147,  1.46024483, -1.44807048,
       -1.61715858, -1.34661762, -1.54952334,  1.29115673,  1.22352149,
       -0.9746238 ,  1.66315055,  0.24281051,  0.95298053,  1.22352149,
        1.59551531,  0.00608717, -0.46735951,  0.47953385, -1.61715858,
        0.00608717,  0.51335147, -1.07607666,  1.32497435,  0.88534529,
       -0.06154807, -1.44807048, -0.16300093, -1.61715858,  1.52788007,
        1.62933293,  0.00608717,  0.61480433, -1.17752952, -1.44807048,
       -1.44807048,  1.22352149, -1.21134714, -0.23063617,  1.66315055,
        0.64862195,  1.22352149, -1.27898238,  1.08825101, -0.43354189,
       -0.73790047,  1.15588625,  1.25733911,  0.85152767,  0.88534529,
       -0.33208903, -1.1437119 ,  0.44571623,  0.31044575,  0.47953385,
        1.59551531, -1.21134714, -1.44807048, -1.21134714, -0.90698857,
       -0.9746238 , -1.61715858, -0.94080618,  0.24281051, -0.53

In [74]:
pipe.fit_transform(feature, 10, 10)

TypeError: fit_transform() takes from 2 to 3 positional arguments but 4 were given

In [78]:
print(dir(pipe))

['__abstractmethods__', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_check_fit_params', '_check_n_features', '_estimator_type', '_final_estimator', '_fit', '_get_param_names', '_get_params', '_get_tags', '_inverse_transform', '_iter', '_log_message', '_more_tags', '_pairwise', '_replace_estimator', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_required_parameters', '_set_params', '_sk_visual_block_', '_transform', '_validate_data', '_validate_names', '_validate_steps', 'classes_', 'decision_function', 'fit', 'fit_predict', 'fit_transform', 'get_params', 'inverse_transform', 'memory', 'n_fea

In [85]:
class Transformer(BaseEstimator, TransformerMixin):
    def __init__(self, mean=None, scale=None):
        self.mean_ = None
        self.scale_ = None
    
    def fit(self, X, y=None, a=None):
        self.mean_ = sum(X)/len(X)
        self.scale_ = np.sqrt(sum((self.mean_ - X)**2)/len(X))
        return self
    
    def transform(self, X, y=None, a=None):
        return (X - self.mean_)/self.scale_

pipe = Pipeline([
    ('sc', Transformer())
])
#pipe.fit(feature)

In [86]:
sc

''

In [100]:
pipe = Pipeline([
    ([1, 2, 3], StandardScaler())  # scaler.fit()
])

TypeError: unhashable type: 'list'

In [101]:
scaler

NameError: name 'scaler' is not defined

### Multiple Linear Regression

    Multi Variant Linear Regression
    
    
        We will have multiple features and a single target
        
        
                Height   Weight   BMI   STEPS
                
                
                STEPS --> Target --> Dependent Features
                Height, Weight, BMI, Disease --> Independent Features
        

$$ \beta_1 = \frac{\sum\limits_{i=0}^N (x_i - \bar x)(y_i - \bar y)} {\sum\limits_{i=0}^N (x_i - \bar x)^2} $$

$$ y = \beta_0 + \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3 + ..... + \beta_n x_n $$

$$ \beta_1 = \frac{\sum\limits_{i=0}^N (x_{1i} - \bar x_1)(y_i - \bar y)} {\sum\limits_{i=0}^N (x_{1i} - \bar x_1)^2} $$

$$ \beta_2 = \frac{\sum\limits_{i=0}^N (x_{2i} - \bar x_2)(y_i - \bar y)} {\sum\limits_{i=0}^N (x_{2i} - \bar x_2)^2} $$

$$ \beta_n = \frac{\sum\limits_{i=0}^N (x_{ni} - \bar x_n)(y_i - \bar y)} {\sum\limits_{i=0}^N (x_{ni} - \bar x_n)^2} $$

$$ \beta_0 = \bar y - (\beta_1 \bar x_1 + \beta_2 \bar x_2 + \beta_3 \bar x_3 + ..... + \beta_n \bar x_n) $$

#### Without Vector form of equation is 

$$ y = \theta_0 + \theta_1 x $$

#### With Vector form of equation is 

$$ y = \theta.X $$