## Preprocessing - Encode, Normalize features ##

In [3]:
from sklearn import preprocessing
from tqdm import tqdm 

def label_encode(values):
    # Label Encoding Categorical columns
    le = preprocessing.LabelEncoder()
    le.fit(values)
    return le.transform(values)

def label_decode(values, value_to_decode):
    # Label Encoding Categorical columns
    le = preprocessing.LabelEncoder()    
    le.fit(values)
    return le.inverse_transform(value_to_decode)
    
def preprocess(df, columns):
    for col in tqdm(columns):
        
        # Processing for String columns
        if df[col].dtype == object: #For String columns
            
            # Replace NAs with ''
            df[col] = df[col].fillna('')
            
            # Label Encoding Categorical columns 
            df[col] = label_encode(df[col])
        
        # Processing for Numeric columns
        if (df[col].dtype == int) | (df[col].dtype == float):
            
            scaler = preprocessing.StandardScaler()
            scaler.fit(df[col])
            df[col] = scaler.fit_transform(df[col])
        
    return df

## Train-Test Split ##

In [2]:
from sklearn import model_selection
def traintestsplit(df, features_cols, target_col, test_perc = 0.2):
    return model_selection.train_test_split(df[features_cols], df[target_col], \
                                            test_size=0.33, random_state=42)