In [1]:
from itertools import takewhile, starmap
from functools import partial, reduce
 
def compose_two(g, f):
    """Function composition for two functions, e.g. compose_two(f, g)(x) == f(g(x))"""
    return lambda *args, **kwargs: g(f(*args, **kwargs))
 
def compose(*funcs):
    """Compose an arbitrary number of functions left-to-right passed as args"""
    return reduce(compose_two, funcs)
 
def transform_args(func, transformer):
    return lambda *args: func(*transformer(args))
 
composed_partials = transform_args(compose, partial(starmap, partial))
pipe = transform_args(composed_partials, reversed)
 
pipe_style = pipe(
    (takewhile, lambda x: x < 7),
    (filter, lambda x: x < 2),
    (map, lambda x: 4 * x))

In [2]:
list(pipe_style(range(100)))

[0, 4]

In [3]:
from sklearn.preprocessing import Normalizer, LabelBinarizer,RobustScaler,StandardScaler, OneHotEncoder

In [4]:
import numpy as np
import pandas as pd

In [5]:
df_train = pd.read_json(open("./data/train.json", "r"))
df_test = pd.read_json(open("./data/test.json", "r"))

In [6]:
df_train.dtypes

bathrooms          float64
bedrooms             int64
building_id         object
created             object
description         object
display_address     object
features            object
interest_level      object
latitude           float64
listing_id           int64
longitude          float64
manager_id          object
photos              object
price                int64
street_address      object
dtype: object

In [7]:
RobustScaler().fit_transform(df_train[["bathrooms"]])[:,0]

array([ 0.5,  0. ,  0. , ...,  0. ,  0. ,  0. ])

In [8]:
pd.DataFrame(RobustScaler().fit_transform(df_train[["bathrooms"]]))

Unnamed: 0,0
0,0.5
1,0.0
2,0.0
3,0.0
4,0.0
5,1.0
6,0.0
7,1.0
8,0.0
9,1.0


In [10]:
df_train

Unnamed: 0,bathrooms,bedrooms,building_id,created,description,display_address,features,interest_level,latitude,listing_id,longitude,manager_id,photos,price,street_address
10,1.5,3,53a5b119ba8f7b61d4e010512e0dfc85,2016-06-24 07:54:24,A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...,Metropolitan Avenue,[],medium,40.7145,7211212,-73.9425,5ba989232d0489da1b5f2c45f6688adc,[https://photos.renthop.com/2/7211212_1ed4542e...,3000,792 Metropolitan Avenue
10000,1.0,2,c5c8a357cba207596b04d1afd1e4f130,2016-06-12 12:19:27,,Columbus Avenue,"[Doorman, Elevator, Fitness Center, Cats Allow...",low,40.7947,7150865,-73.9667,7533621a882f71e25173b27e3139d83d,[https://photos.renthop.com/2/7150865_be3306c5...,5465,808 Columbus Avenue
100004,1.0,1,c3ba40552e2120b0acfc3cb5730bb2aa,2016-04-17 03:26:41,"Top Top West Village location, beautiful Pre-w...",W 13 Street,"[Laundry In Building, Dishwasher, Hardwood Flo...",high,40.7388,6887163,-74.0018,d9039c43983f6e564b1482b273bd7b01,[https://photos.renthop.com/2/6887163_de85c427...,2850,241 W 13 Street
100007,1.0,1,28d9ad350afeaab8027513a3e52ac8d5,2016-04-18 02:22:02,Building Amenities - Garage - Garden - fitness...,East 49th Street,"[Hardwood Floors, No Fee]",low,40.7539,6888711,-73.9677,1067e078446a7897d2da493d2f741316,[https://photos.renthop.com/2/6888711_6e660cee...,3275,333 East 49th Street
100013,1.0,4,0,2016-04-28 01:32:41,Beautifully renovated 3 bedroom flex 4 bedroom...,West 143rd Street,[Pre-War],low,40.8241,6934781,-73.9493,98e13ad4b495b9613cef886d79a6291f,[https://photos.renthop.com/2/6934781_1fa4b41a...,3350,500 West 143rd Street
100014,2.0,4,38a913e46c94a7f46ddf19b756a9640c,2016-04-19 04:24:47,,West 18th Street,[],medium,40.7429,6894514,-74.0028,b209e2c4384a64cc307c26759ee0c651,[https://photos.renthop.com/2/6894514_9abb8592...,7995,350 West 18th Street
100016,1.0,2,3ba49a93260ca5df92fde024cb4ca61f,2016-04-27 03:19:56,Stunning unit with a great location and lots o...,West 107th Street,"[prewar, elevator, Dogs Allowed, Cats Allowed,...",low,40.8012,6930771,-73.9660,01287194f20de51872e81f660def4784,[https://photos.renthop.com/2/6930771_7e3622b6...,3600,210 West 107th Street
100020,2.0,1,0372927bcb6a0949613ef5bf893bbac7,2016-04-13 06:01:42,"This huge sunny ,plenty of lights 1 bed/2 bath...",West 21st Street,"[Doorman, Elevator, Pre-War, Terrace, Laundry ...",low,40.7427,6867392,-73.9957,e6472c7237327dd3903b3d6f6a94515a,[https://photos.renthop.com/2/6867392_b18283f6...,5645,155 West 21st Street
100026,1.0,1,a7efbeb58190aa267b4a9121cd0c88c0,2016-04-20 02:36:35,<p><a website_redacted,Hamilton Terrace,"[Cats Allowed, Dogs Allowed, Elevator, Laundry...",medium,40.8234,6898799,-73.9457,c1a6598437b7db560cde66e5a297a53f,[https://photos.renthop.com/2/6898799_3759be4c...,1725,63 Hamilton Terrace
100027,2.0,4,0,2016-04-02 02:58:15,This is a spacious four bedroom with every bed...,522 E 11th,"[Dishwasher, Hardwood Floors]",low,40.7278,6814332,-73.9808,23a01ea7717b38875f5b070282d1b9d2,[https://photos.renthop.com/2/6814332_e19a8552...,5800,522 E 11th


In [30]:
a,b = transformation(df_train, ['bedrooms'], RobustScaler())

In [31]:
b

10        2.0
10000     1.0
100004    0.0
100007    0.0
100013    3.0
100014    3.0
100016    1.0
100020    0.0
100026    0.0
100027    3.0
100030   -1.0
10004     0.0
100044    1.0
100048    1.0
10005     0.0
100051   -1.0
100052    1.0
100053    0.0
100055    3.0
100058    0.0
100062    2.0
100063    1.0
100065    0.0
100066    0.0
10007     3.0
100071    3.0
100075    0.0
100076    0.0
100079    0.0
100081    1.0
         ... 
99915     0.0
99917     0.0
99919    -1.0
99921     1.0
99923     1.0
99924     0.0
99931     0.0
99933     1.0
99935     0.0
99937    -1.0
9994      1.0
99953     0.0
99956     1.0
99960     0.0
99961     2.0
99964    -1.0
99965     2.0
99966     2.0
99979     0.0
99980    -1.0
99982     0.0
99984     0.0
99986     0.0
99987     0.0
99988     0.0
9999      1.0
99991     0.0
99992     0.0
99993    -1.0
99994     1.0
Name: bedrooms_new, dtype: float64

In [56]:
result = pd.DataFrame()
result

In [57]:
for i in starmap(transformation,selection):
    result[name] = i

In [58]:
result

Unnamed: 0,price
10,-0.093750
10000,1.446875
100004,-0.187500
100007,0.078125
100013,0.125000
100014,3.028125
100016,0.281250
100020,1.559375
100026,-0.890625
100027,1.656250


In [39]:
from multiprocessing import Pool, cpu_count

In [59]:
result = pd.DataFrame()
result

In [70]:
selection = [
    (df_train,["bathrooms"],RobustScaler()),
    (df_train,["bedrooms"],RobustScaler()),
    #(df_train,"latitude",None),
    #(df_train,"longitude",None),
    (df_train,["price"],RobustScaler())
    ]

In [71]:
def transformation(df,sCol,Transformer):
    if isinstance(sCol, list):
        name = sCol[0]
    else:
        name = sCol
    # This step is necessary to keep the index
    df[ name + '_new'] = Transformer.fit_transform(df[sCol])[:,0]
    return df[ name + '_new']

In [72]:
with Pool(cpu_count()) as mp:
    list_of_series = mp.starmap(transformation,selection)
    result = pd.DataFrame(list_of_series).T

In [73]:
result

Unnamed: 0,bathrooms_new,bedrooms_new,price_new
10,0.5,2.0,-0.093750
10000,0.0,1.0,1.446875
100004,0.0,0.0,-0.187500
100007,0.0,0.0,0.078125
100013,0.0,3.0,0.125000
100014,1.0,3.0,3.028125
100016,0.0,1.0,0.281250
100020,1.0,0.0,1.559375
100026,0.0,0.0,-0.890625
100027,1.0,3.0,1.656250


In [64]:
    test2 = [
    (["bathrooms"],RobustScaler()),
    (["bedrooms"],RobustScaler()),
    #("latitude",None),
    #("longitude",None),
    (["price"],RobustScaler())
    ]

In [65]:
test2

[(['bathrooms'],
  RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
         with_scaling=True)),
 (['bedrooms'],
  RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
         with_scaling=True)),
 (['price'],
  RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
         with_scaling=True))]

In [69]:
[(a,b,df_train,df_test) for (a,b) in test2]

[(['bathrooms'],
  RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
         with_scaling=True),
          bathrooms  bedrooms                       building_id  \
  10            1.5         3  53a5b119ba8f7b61d4e010512e0dfc85   
  10000         1.0         2  c5c8a357cba207596b04d1afd1e4f130   
  100004        1.0         1  c3ba40552e2120b0acfc3cb5730bb2aa   
  100007        1.0         1  28d9ad350afeaab8027513a3e52ac8d5   
  100013        1.0         4                                 0   
  100014        2.0         4  38a913e46c94a7f46ddf19b756a9640c   
  100016        1.0         2  3ba49a93260ca5df92fde024cb4ca61f   
  100020        2.0         1  0372927bcb6a0949613ef5bf893bbac7   
  100026        1.0         1  a7efbeb58190aa267b4a9121cd0c88c0   
  100027        2.0         4                                 0   
  100030        1.0         0                                 0   
  10004         1.0         1                                 0   
  10004

In [66]:
    test3 = [
    (["bathrooms"],RobustScaler(),df_train,df_test),
    (["bedrooms"],RobustScaler(),df_train,df_test),
    #("latitude",None,df_train,df_test),
    #("longitude",None,df_train,df_test),
    (["price"],RobustScaler(),df_train,df_test)
    ]

In [67]:
test3

[(['bathrooms'],
  RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
         with_scaling=True),
          bathrooms  bedrooms                       building_id  \
  10            1.5         3  53a5b119ba8f7b61d4e010512e0dfc85   
  10000         1.0         2  c5c8a357cba207596b04d1afd1e4f130   
  100004        1.0         1  c3ba40552e2120b0acfc3cb5730bb2aa   
  100007        1.0         1  28d9ad350afeaab8027513a3e52ac8d5   
  100013        1.0         4                                 0   
  100014        2.0         4  38a913e46c94a7f46ddf19b756a9640c   
  100016        1.0         2  3ba49a93260ca5df92fde024cb4ca61f   
  100020        2.0         1  0372927bcb6a0949613ef5bf893bbac7   
  100026        1.0         1  a7efbeb58190aa267b4a9121cd0c88c0   
  100027        2.0         4                                 0   
  100030        1.0         0                                 0   
  10004         1.0         1                                 0   
  10004