In [1]:
import pandas as pd

In [2]:
from Factories.CommonTransFactory import *

In [28]:
from sklearn.pipeline import Pipeline

In [3]:
X = pd.DataFrame({'city':['tokyo', None, 'london', 'seattle', 'sanfrancisco', 'tokyo'],                  
                  'boolean':['yes', 'no', None, 'no', 'no', 'yes'],                  
                  'ordinal_column':['somewhat like', 'like','somewhat like', 'like', 'somewhat like', 'dislike'],                  
                  'quantitative_column':[1, 11, -.5, 10, None,20]})

In [4]:
X

Unnamed: 0,boolean,city,ordinal_column,quantitative_column
0,yes,tokyo,somewhat like,1.0
1,no,,like,11.0
2,,london,somewhat like,-0.5
3,no,seattle,like,10.0
4,no,sanfrancisco,somewhat like,
5,yes,tokyo,dislike,20.0


In [5]:
mostFrequentCity = X.loc[:, 'city'].value_counts().index[0]

In [6]:
X.loc[:, 'city'].fillna(mostFrequentCity)

0           tokyo
1           tokyo
2          london
3         seattle
4    sanfrancisco
5           tokyo
Name: city, dtype: object

In [7]:
TransFactory = CommonTransFactory()

In [8]:
customCategoryImputerTrans = TransFactory.makeCustomCateoryTransform()

In [9]:
customCategoryImputerTrans.getCols(cols = ['city','boolean'])

In [10]:
customCategoryImputerTrans.fit_transform(X)

Unnamed: 0,boolean,city,ordinal_column,quantitative_column
0,yes,tokyo,somewhat like,1.0
1,no,tokyo,like,11.0
2,no,london,somewhat like,-0.5
3,no,seattle,like,10.0
4,no,sanfrancisco,somewhat like,
5,yes,tokyo,dislike,20.0


In [11]:
cutterTransform  = TransFactory.makeCustomCutterTrans()

In [12]:
cutterTransform.getCol("quantitative_column")

In [13]:
cutterTransform.getBins(3)

In [14]:
cutterTransform.transform(X)

Unnamed: 0,boolean,city,ordinal_column,quantitative_column
0,yes,tokyo,somewhat like,0.0
1,no,,like,1.0
2,,london,somewhat like,0.0
3,no,seattle,like,1.0
4,no,sanfrancisco,somewhat like,
5,yes,tokyo,dislike,2.0


In [15]:
dummyTransform = TransFactory.makeCustomDummifierTrans()

In [16]:
dummyTransform.getCols(cols = ['boolean', 'city'])

In [17]:
dummyTransform.transform(X)

Unnamed: 0,ordinal_column,quantitative_column,boolean_no,boolean_yes,city_london,city_sanfrancisco,city_seattle,city_tokyo
0,somewhat like,1.0,0,1,0,0,0,1
1,like,11.0,1,0,0,0,0,0
2,somewhat like,-0.5,0,0,1,0,0,0
3,like,10.0,1,0,0,0,1,0
4,somewhat like,,1,0,0,1,0,0
5,dislike,20.0,0,1,0,0,0,1


In [18]:
encodingTransfrom = TransFactory.makeCustomEncodingTrans()

In [19]:
encodingTransfrom.getCol(col = 'ordinal_column')

In [20]:
encodingTransfrom.getOrdering(ordering  = ['dislike', 'somewhat like', 'like'])

In [21]:
X.head(10)

Unnamed: 0,boolean,city,ordinal_column,quantitative_column
0,yes,tokyo,somewhat like,1.0
1,no,,like,11.0
2,,london,somewhat like,-0.5
3,no,seattle,like,10.0
4,no,sanfrancisco,somewhat like,
5,yes,tokyo,dislike,20.0


In [22]:
encodingTransfrom.transform(X)

Unnamed: 0,boolean,city,ordinal_column,quantitative_column
0,yes,tokyo,1,1.0
1,no,,2,11.0
2,,london,1,-0.5
3,no,seattle,2,10.0
4,no,sanfrancisco,1,
5,yes,tokyo,0,20.0


In [25]:
quantitativeImputerTrans = TransFactory.makeCustomQuantitativeImputerTrans()

In [26]:
quantitativeImputerTrans.getCols(cols = ['quantitative_column'])

In [27]:
quantitativeImputerTrans.transform(X)

Unnamed: 0,boolean,city,ordinal_column,quantitative_column
0,yes,tokyo,somewhat like,1.0
1,no,,like,11.0
2,,london,somewhat like,-0.5
3,no,seattle,like,10.0
4,no,sanfrancisco,somewhat like,8.3
5,yes,tokyo,dislike,20.0


In [29]:
transPipeline = Pipeline([("quant",quantitativeImputerTrans),
                          ("category",customCategoryImputerTrans),
                          ("dummy",dummyTransform),
                          ("encode", encodingTransfrom),
                          ("cut", cutterTransform)])

In [30]:
transPipeline.fit(X)

Pipeline(memory=None,
     steps=[('quant', <TransformStrategy.CustomQuantitativeImputerTrans.CustomQuantitativeImputerTrans object at 0x10d293860>), ('category', <TransformStrategy.CustomCategoryImputerTrans.CustomCategoryImputerTrans object at 0x10d0f2f98>), ('dummy', <TransformStrategy.CustomDummifierTrans.CustomDummifierT...10d196b70>), ('cut', <TransformStrategy.CustomCutterTrans.CustomCutterTrans object at 0x10d0f2ba8>)])

In [31]:
transPipeline.transform(X)

Unnamed: 0,ordinal_column,quantitative_column,boolean_no,boolean_yes,city_london,city_sanfrancisco,city_seattle,city_tokyo
0,1,0,0,1,0,0,0,1
1,2,1,1,0,0,0,0,1
2,1,0,1,0,1,0,0,0
3,2,1,1,0,0,0,1,0
4,1,1,1,0,0,1,0,0
5,0,2,0,1,0,0,0,1
