# Exercises - Scaling Numeric Data

### Our scenario continues:
*As a customer analyst, I want to know who has spent the most money with us over their lifetime. I have monthly charges and tenure, so I think I will be able to use those two attributes as features to estimate total_charges. I need to do this within an average of $5.00 per customer.*

#### Create `split_scale.py` that will contain the functions that follow. 
Each scaler function should create the object, fit and transform both train and test. They should return the scaler, train df scaled, test df scaled. Be sure your indices represent the original indices from train/test, as those represent the indices from the original dataframe. Be sure to set a random state where applicable for reproducibility!

In [None]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import math

import wrangle
import env

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, QuantileTransformer, PowerTransformer, RobustScaler, MinMaxScaler

import split_scale

In [None]:
df = wrangle.wrangle_telco()

In [None]:
df.info()

In [None]:
#df = df.reindex().drop('customer_id', axis=1)
df.info()

1. **`split_my_data(X, y, train_pct)`**

In [None]:
def split_my_data(df, target_column, train_pct=.75, random_state=None):
    X = df.drop([target_column], axis=1)
    y = pd.DataFrame(df[target_column])
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_pct, random_state=random_state)
    return X_train, X_test, y_train, y_test

In [None]:
X_train, X_test, y_train, y_test = split_scale.split_my_data(df, 'total_charges', train_pct=.75, random_state=123)

In [None]:
print(X_train.info())
X_train.head()

In [None]:
print(X_test.info())
X_test.head()

In [None]:
print(y_train.info())
y_train.head()

In [None]:
print(y_test.info())
y_test.head()

In [None]:
def split_my_data_whole(df, train_pct=.75, random_state=None):
    train, test = train_test_split(df, train_size=train_pct, random_state=random_state)
    return train, test

In [None]:
train, test = split_scale.train_test_split(df, train_size = .80, random_state = 123)

In [None]:
print(train.info())
train.head(10)

In [None]:
test.info()

2. **`standard_scaler()`**

In [None]:
def standard_scaler(train, test):
    scaler = StandardScaler(copy=True, with_mean=True, with_std=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [None]:
scaler_standard, train_scaled, test_scaled = split_scale.standard_scaler(train, test)

In [None]:
train_scaled.head(10)

In [None]:
test_scaled.head(10)

In [None]:
type(scaler_standard)

3. **`scale_inverse()`**

In [None]:
def scale_inverse(train_scaled, test_scaled, scaler):
        train_unscaled = pd.DataFrame(scaler.inverse_transform(train_scaled), columns=train_scaled.columns.values).set_index([train.index.values])
        test_unscaled = pd.DataFrame(scaler.inverse_transform(test_scaled), columns=test_scaled.columns.values).set_index([test.index.values])
        return train_unscaled, test_unscaled

In [None]:
train_unscaled, test_unscaled = split_scale.scale_inverse(train_scaled, test_scaled, scaler_standard)

In [None]:
train_unscaled.head(10)

4. **`uniform_scaler()`**

In [None]:
def uniform_scaler(train, test):
    scaler = QuantileTransformer(n_quantiles=100, output_distribution='uniform', random_state=123, copy=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [None]:
scaler_uniform, train_scaled, test_scaled = split_scale.uniform_scaler(train, test)

In [None]:
train_scaled.head(10)

In [None]:
test_scaled.head(10)

5. **`gaussian_scaler()`**

In [None]:
def gaussian_scaler(train, test):
    scaler = PowerTransformer(method='yeo-johnson', standardize=False, copy=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [None]:
scaler_gaussian, train_scaled, test_scaled = split_scale.gaussian_scaler(train, test)

In [None]:
train_scaled.head(10)

In [None]:
test_scaled.head(10)

6. **`min_max_scaler()`**

In [None]:
def min_max_scaler(train, test):
    scaler = MinMaxScaler(copy=True, feature_range=(0,1)).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [None]:
scaler_min_max, train_scaled, test_scaled = split_scale.min_max_scaler(train, test)

In [None]:
train_scaled.head(10)

In [None]:
test_scaled.head(10)

7. **`iqr_robust_scaler()`**

In [None]:
def iqr_robust_scaler(train, test):
    scaler = RobustScaler(quantile_range=(25.0,75.0), copy=True, with_centering=True, with_scaling=True).fit(train)
    train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])
    test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index([test.index.values])
    return scaler, train_scaled, test_scaled

In [None]:
scaler_iqr_robust, train_scaled, test_scaled = split_scale.iqr_robust_scaler(train, test)

In [None]:
train_scaled.head(10)

In [None]:
test_scaled.head(10)