# Titatnic - Manual MinMaxScaler

In [99]:
import pandas as pd
import numpy as np
import pickle

In [100]:
df = pd.read_csv("../data/titanic-clean.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,S


## Apply()

## Class Implementation

In [101]:
class MinMaxScaler():
    
    def __init__(self, clip=False):
        self.clip = clip
        self.minmax = {}
        
    def fit(self, df):
        for col in df.columns:
            self.minmax[col] = {"min": df[col].min(),
                                "max": df[col].max()}
        
    def transform(self, df):
        df_transformed_1 = df.copy()
        df_transformed_2 = df.copy()
        
        if self.clip:
            for col in df.columns:
                df_transformed_1[col] = np.where(df[col] < self.minmax[col]["min"], self.minmax[col]["min"], df[col])
                df_transformed_1[col] = np.where(df[col] > self.minmax[col]["max"], self.minmax[col]["max"], df[col])

        for col in df_transformed_1.columns:
            df_transformed_2[col] = df_transformed_1[col].apply(lambda x: (x - self.minmax[col]["min"])/(self.minmax[col]["max"] - self.minmax[col]["min"]))
        return df_transformed_2
    
    def fit_transform(self, df):
        self.fit(df)
        df_transformed = self.transform(df)
        return df_transformed
        

In [102]:
columns_to_scale = ["Age", "SibSp", "Fare"]

scaler = MinMaxScaler(clip=False)

scaler.fit(df[columns_to_scale])
scaler.minmax

{'Age': {'min': 0.42, 'max': 80.0},
 'SibSp': {'min': 0, 'max': 8},
 'Fare': {'min': 0.0, 'max': 512.3292}}

In [103]:
df_transformed = scaler.transform(df[columns_to_scale])

In [104]:
df_transformed.describe()

Unnamed: 0,Age,SibSp,Fare
count,889.0,889.0,889.0
mean,0.365866,0.065523,0.062649
std,0.182552,0.137963,0.097003
min,0.0,0.0,0.0
25%,0.246042,0.0,0.015412
50%,0.346569,0.0,0.028213
75%,0.472229,0.125,0.060508
max,1.0,1.0,1.0


In [105]:
df_test = pd.DataFrame(data=[{"Age":100, "SibSp": 4, "Fare": 200}])
scaler.clip = True
scaler.transform(df_test)

Unnamed: 0,Age,SibSp,Fare
0,1.0,0.5,0.390374


## Pickle

In [106]:
# saving an object
pickle.dump(scaler, open("../object/scaler.pkl", "wb"))

In [107]:
# loading an object
new_scaler = pickle.load(open("../object/scaler.pkl", "rb"))

In [108]:
new_scaler.minmax

{'Age': {'min': 0.42, 'max': 80.0},
 'SibSp': {'min': 0, 'max': 8},
 'Fare': {'min': 0.0, 'max': 512.3292}}

In [110]:
df_test = pd.DataFrame(data=[{"Age":100, "SibSp": 4, "Fare": 200}])
new_scaler.clip = False
new_scaler.transform(df_test)

Unnamed: 0,Age,SibSp,Fare
0,1.251319,0.5,0.390374
