In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PowerTransformer, MinMaxScaler, RobustScaler, QuantileTransformer
from sklearn.model_selection import train_test_split
from wrangle import wrangle_telco

In [2]:
df = wrangle_telco()
df.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
0,0013-SMEOE,109.7,71,7904.25
1,0014-BMAQU,84.65,63,5377.8
2,0016-QLJIS,90.45,65,5957.9
3,0017-DINOC,45.2,54,2460.55
4,0017-IUDMW,116.8,72,8456.75


In [3]:
def split_my_data(df):
    train, test = train_test_split(df, train_size = .80, random_state = 123)
    return train, test

In [4]:
train, test = split_my_data(df)

In [5]:
train.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
119,0707-HOVVN,75.5,70,5212.65
1424,8380-MQINP,20.3,55,1079.05
385,2307-FYNNL,109.05,65,7108.2
1140,6797-LNAQX,98.3,70,6859.5
1504,8879-XUAHX,116.25,71,8564.75


In [6]:
train = train[['monthly_charges','tenure','total_charges']]
train.head()

Unnamed: 0,monthly_charges,tenure,total_charges
119,75.5,70,5212.65
1424,20.3,55,1079.05
385,109.05,65,7108.2
1140,98.3,70,6859.5
1504,116.25,71,8564.75


In [7]:
test = test[['monthly_charges','tenure','total_charges']]
test.head()

Unnamed: 0,monthly_charges,tenure,total_charges
305,20.5,72,1502.25
452,111.3,67,7567.2
917,109.2,63,7049.75
1421,19.45,64,1225.65
1557,24.1,24,587.4


In [8]:
def standard_scaler(train, test):
    scaler = StandardScaler(copy=True, with_mean=True, with_std=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled,test_scaled

In [9]:
scaler, train_scaled, test_scaled = standard_scaler(train, test)

In [10]:
train_scaled.head()

Unnamed: 0,monthly_charges,tenure,total_charges
119,0.419607,0.729412,0.572659
1424,-1.169158,-0.130571,-1.035331
385,1.385242,0.442751,1.310036
1140,1.075836,0.729412,1.213291
1504,1.592472,0.786745,1.876641


In [11]:
test_scaled.head()

Unnamed: 0,monthly_charges,tenure,total_charges
305,-1.163401,0.844077,-0.870704
452,1.450002,0.557416,1.488589
917,1.38956,0.328087,1.287299
1421,-1.193622,0.385419,-0.978302
1557,-1.059786,-1.90787,-1.226585


In [12]:
def scale_inverse(scaler, train_scaled, test_scaled):
    train = pd.DataFrame(scaler.inverse_transform(train_scaled), columns=train_scaled.columns.values).set_index([train_scaled.index.values])
    test = pd.DataFrame(scaler.inverse_transform(test_scaled), columns=test_scaled.columns.values).set_index([test_scaled.index.values])
    return train, test

In [13]:
train, test = scale_inverse(scaler, train_scaled, test_scaled)

In [14]:
train.head()

Unnamed: 0,monthly_charges,tenure,total_charges
119,75.5,70.0,5212.65
1424,20.3,55.0,1079.05
385,109.05,65.0,7108.2
1140,98.3,70.0,6859.5
1504,116.25,71.0,8564.75


In [15]:
def uniform_scaler(train, test):
    scaler = QuantileTransformer(n_quantiles=100, output_distribution='uniform', random_state= 123, copy=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [16]:
scaler, train_scaled, test_scaled = uniform_scaler(train, test)

In [17]:
scaler

QuantileTransformer(copy=True, ignore_implicit_zeros=False, n_quantiles=100,
                    output_distribution='uniform', random_state=123,
                    subsample=100000)

In [18]:
train_scaled.head()

Unnamed: 0,monthly_charges,tenure,total_charges
119,0.58071,0.686869,0.657682
1424,0.181818,0.323232,0.181987
385,0.898309,0.515152,0.872947
1140,0.800304,0.686869,0.850193
1504,0.987325,0.752525,0.995336


In [19]:
def gaussian_scaler(train, test):
    scaler = PowerTransformer(method='yeo-johnson', standardize=False, copy=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [20]:
scaler, train_scaled, test_scaled = gaussian_scaler(train, test)

In [21]:
scaler

PowerTransformer(copy=True, method='yeo-johnson', standardize=False)

In [22]:
train.head()

Unnamed: 0,monthly_charges,tenure,total_charges
119,75.5,70.0,5212.65
1424,20.3,55.0,1079.05
385,109.05,65.0,7108.2
1140,98.3,70.0,6859.5
1504,116.25,71.0,8564.75


In [23]:
def my_minmax_scaler(train, test):
    scaler = MinMaxScaler(copy=True, feature_range=(0,1)).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [24]:
scaler, train_scaled, test_scaled = my_minmax_scaler(train, test)

In [25]:
scaler

MinMaxScaler(copy=True, feature_range=(0, 1))

In [26]:
train_scaled.head()

Unnamed: 0,monthly_charges,tenure,total_charges
119,0.569008,0.971831,0.60012
1424,0.018934,0.760563,0.122363
385,0.903338,0.901408,0.819206
1140,0.796213,0.971831,0.790461
1504,0.975087,0.985915,0.987552


In [27]:
test_scaled.head()

Unnamed: 0,monthly_charges,tenure,total_charges
305,0.020927,1.0,0.171276
452,0.92576,0.929577,0.872256
917,0.904833,0.873239,0.81245
1421,0.010463,0.887324,0.139307
1557,0.056801,0.323944,0.065539


In [28]:
def iqr_robust_scaler(train, test):
    scaler = RobustScaler(quantile_range=(25.0,75.0), copy=True, with_centering=True, with_scaling=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [29]:
scaler, train_scaled, test_scaled = iqr_robust_scaler(train, test)

In [30]:
scaler

RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
             with_scaling=True)

In [31]:
train_scaled.head()

Unnamed: 0,monthly_charges,tenure,total_charges
119,0.166168,0.272727,0.313763
1424,-0.66018,-0.409091,-0.563784
385,0.668413,0.045455,0.716181
1140,0.507485,0.272727,0.663383
1504,0.776198,0.318182,1.025401


In [32]:
test_scaled.head()

Unnamed: 0,monthly_charges,tenure,total_charges
305,-0.657186,0.363636,-0.473941
452,0.702096,0.136364,0.813625
917,0.670659,-0.045455,0.703773
1421,-0.672904,0.0,-0.532662
1557,-0.603293,-1.818182,-0.66816
