#### Preprocessing:

In [1]:
import numpy as np
import pandas as pd
import logging
import os
from dotenv import find_dotenv, load_dotenv
import datetime
import glob
from os.path import abspath
from pathlib import Path
from inspect import getsourcefile
from datetime import datetime
import math
import argparse
import sys
import tensorflow as tf

from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import OneHotEncoder

nb_dir = os.path.join(Path(os.getcwd()).parents[0], 'src', 'data')
if nb_dir not in sys.path:
    sys.path.insert(0, nb_dir)
import get_raw_data as grd
import data_classes
import Normalizer

DT_FLOAT = np.float32 
DT_BOOL = np.uint8
RANDOM_SEED = 123
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# logger.propagate = False # it will not log to console.

RAW_DIR = os.path.join(Path(os.getcwd()).parents[0], 'data', 'raw') 
PRO_DIR = os.path.join(Path(os.getcwd()).parents[0], 'data', 'processed')

print(RAW_DIR, PRO_DIR)

  from ._conv import register_converters as _register_converters


C:\Users\sandr\Documents\GitHub\MLMortgage\data\raw C:\Users\sandr\Documents\GitHub\MLMortgage\data\processed


In [2]:
def update_parser(parser):
    """Parse the arguments from the CLI and update the parser."""    
    parser.add_argument(
        '--prepro_step',
        type=str,
        default='preprocessing', #'slicing', 'preprocessing'
        help='To execute a preprocessing method')    
    #this is for allfeatures_preprocessing:
    parser.add_argument(
        '--train_period',
        type=int,
        nargs='*',
        default=[121,323], #[121,279], #[156, 180], [121,143],  # 279],
        help='Training Period')
    parser.add_argument(
        '--valid_period',
        type=int,
        nargs='*',
        default=[324,329], #[280,285], #[181,185], [144,147],
        help='Validation Period')    
    parser.add_argument(
        '--test_period',
        type=int,
        nargs='*',
        default=[330, 342], #[286, 304], # [186,191], [148, 155],
        help='Testing Period')    
    parser.add_argument(
        '--prepro_dir',
        type=str,
        default='chuncks_random_c1mill',
        help='Directory with raw data inside data/raw/ and it will be the output directory inside data/processed/')    
    parser.add_argument(
        '--prepro_chunksize',
        type=int,
        default=500000,
        help='Chunk size to put into the h5 file...')    
    parser.add_argument(
        '--prepro_with_index',
        type=bool,
        default=True,
        help='To keep indexes for each record')
    parser.add_argument(
        '--ref_norm',
        type=bool,
        default=True,
        help='To execute the normalization over the raw inputs')
        
    return parser.parse_known_args()

In [3]:

FLAGS, UNPARSED = update_parser(argparse.ArgumentParser())    
#these are the more important parameters for preprocessing:
FLAGS.prepro_dir='chuncks_random_c1mill' #this directory must be the same inside 'raw' and processed directories.
FLAGS.prepro_chunksize=500000 
FLAGS.train_period=[121,323] #[121,279] #[121, 143] 
FLAGS.valid_period=[324,329] #[280,285] #[144, 147] 
FLAGS.test_period=[330,342] #[286,304] #[148, 155]                                                
FLAGS.prepro_with_index = True

print(FLAGS)    

Namespace(prepro_chunksize=500000, prepro_dir='chuncks_random_c1mill', prepro_step='preprocessing', prepro_with_index=True, ref_norm=True, test_period=[330, 342], train_period=[121, 323], valid_period=[324, 329])


In [4]:
glob.glob(os.path.join(RAW_DIR, FLAGS.prepro_dir,"*.txt"))
# from IPython.core.debugger import Tracer; Tracer()()

['C:\\Users\\sandr\\Documents\\GitHub\\MLMortgage\\data\\raw\\chuncks_random_c1mill\\temporalloandynmodifmrstaticitur15mill-16mill.txt']

In [5]:
def allfeatures_extract_labels(data, columns='MBA_DELINQUENCY_STATUS_next'):
    '''Extract the labels from Dataset, order-and-transform them into one-hot matrix of labels.
        Args: 
            data (DataFrame): Input Dataset which is modified in place.
            columns (string): Name of the class column.
        Returns: 
            one-hot matrix of labels of shape: [data.shape[0], 7]. 
        Raises:        
    '''    
    logger.name = 'allfeatures_extract_labels'
    if (type(columns)==str):
         indices = [i for i, elem in enumerate(data.columns) if columns in elem] # (alphabetically ordered)
    else:
        indices =  columns 

    if indices:
        labels = data[data.columns[indices]]
        data.drop(data.columns[indices], axis=1, inplace=True)    
        logger.info('...Labels extracted from Dataset...')
        return labels
    else: return None


In [6]:
def tag_chunk(tag, label, chunk, chunk_periods, tag_period, log_file, with_index, tag_index, hdf=None, tfrec=None):
    '''Extract records filtering by chunk_periods parameter, define indexes in case of with_index=True, 
        extract labels and save the results into the target file.
        Args: 
            chunk (DataFrame): Input Dataset which is modified in place.
            tag (string): 'train', 'valid' or 'test'
            chunk_periods (integer array): an array containing all periods into the chunk.
            tag_period (integer array): an array of form [init_period, end_period] for the correspond tag.
            log_file (Logger): An object of the log file.
            with_index (boolean): If true it will be saved the indexes.
            tag_index (int): an index that accumulates the size of the processed chunk. 
            hdf or tfrec (HDFStore or TFRecords): an object of the target file. Only one must be distint of None.
        Returns: 
            tag_index (int): tag_index updated.
        Raises:        
    '''    
    
    inter_periods = list(chunk_periods.intersection(set(range(tag_period[0], tag_period[1]+1))))
    log_file.write('Periods corresponding to ' + tag +' period: %s\r\n' % str(inter_periods))
    p_chunk = chunk.loc[(slice(None), inter_periods), :]
    log_file.write('Records for ' + tag +  ' Set - Number of rows: %d\r\n' % (p_chunk.shape[0]))
    print('Records for ' + tag + ' Set - Number of rows:', p_chunk.shape[0])
    if (p_chunk.shape[0] > 0):
        if (with_index==True):
            # p_chunk.index = pd.MultiIndex.from_tuples([(i, x[1], x[2],x[3]) for x,i in zip(p_chunk.index, range(tag_index, tag_index + p_chunk.shape[0]))])                                
            p_chunk.reset_index(inplace=True)
            allfeatures_drop_cols(p_chunk, ['PERIOD'])      
            p_chunk.set_index('DELINQUENCY_STATUS_NEXT', inplace=True) #1 index                                      
        else:
            p_chunk.reset_index(drop=True, inplace=True)
            
        labels = allfeatures_extract_labels(p_chunk, columns=label)
        p_chunk = p_chunk.astype(DT_FLOAT)
        labels = labels.astype(np.int8)
        if (p_chunk.shape[0] != labels.shape[0]) : 
            print('Error in shapes:', p_chunk.shape, labels.shape)
        else :
            if (hdf!=None):
                hdf.put(tag + '/features', p_chunk, append=True, index=True) #data_columns=p_chunk.columns.values), index=False
                hdf.put(tag + '/labels', labels, append=True, index=True) #data_columns=labels.columns.values)                         
                hdf.flush()                      
            elif (tfrec!=None):
                for row, lab in zip(p_chunk.values, labels.values):
                    feature = {tag + '/labels': _int64_feature(lab),
                               tag + '/features': _float_feature(row)}
                    # Create an example protocol buffer
                    example = tf.train.Example(features=tf.train.Features(feature=feature))
                    tfrec.write(example.SerializeToString())                            
                tfrec.flush()
            tag_index += p_chunk.shape[0]

    return tag_index

In [7]:
def allfeatures_drop_cols(data, columns):
    '''Exclude from the dataset 'data' the descriptive columns as parameters.
        Args: 
            data (DataFrame): Input Dataset which is modified in place.
        Returns: 
            None
        Raises:        
    '''
    logger.name = 'allfeatures_drop_cols'    
    data.drop(columns, axis=1, inplace=True)
    logger.info('...Columns Excluded from dataset...')
    return None

In [8]:
def oneHotDummies_column(column, categories):
    '''Convert categorical variable into dummy/indicator variables.
    
    Args: 
        column (Series): Input String Categorical Column.
    Returns: 
        DataFrame. Integer Sparse binary matrix of categorical features.
    Raises:        
    '''    
    logger.name = 'oneHotDummies_column: ' +  column.name
    cat_column = pd.Categorical(column.astype('str'), categories=categories)
    cat_column = pd.get_dummies(cat_column)   # in the same order as categories! (alphabetically ordered) 
    cat_column = cat_column.add_prefix(column.name + '_')
    if (cat_column.isnull().any().any()):
        null_cols = cat_column.columns[cat_column.isnull().any()]
        print(cat_column[null_cols].isnull().sum())
        print(cat_column[cat_column.isnull().any(axis=1)][null_cols].head(50))
    return cat_column

In [9]:
def imputing_nan_values(nan_dict, distribution):        
    '''Replace nan values with a value according the nan_dict dictionary and distribution of this feature.
        Args: 
            nan_dict (Dictionary): the key values are the name of features, the values could be a literal or 
            values belonging to the distribution.
            distribution (DataFrame): Contains the median value for numerical features.
        Returns: 
            new_dict (Dictionary): contains the values updated.
        Raises:        
    '''    
    new_dict = {}
    for k,v in nan_dict.items():
        if v=='median':
            new_dict[k] = float(distribution[k+'_MEDIAN'])    
        elif v=='mean':
            new_dict[k] = float(distribution[k+'_MEAN'])                
        else:
            new_dict[k] = v
            
    return new_dict

In [10]:
def drop_invalid_delinquency_status(data, gflag, log_file):   
    '''Delete all subsecuent records of a loan when the feature delinquency_status_next 
       contains any of the following invalid status: S,T,X or Z.
        Args: 
            data (DataFrame): Input Dataset which is modified in place.
            gflag (int): Loan_id of the last loan in previous data, in case this contains some invalid status, 
            to delete all records inside the current data.
            log_file (Logger): An object of the log file.
        Returns: 
            gflag (int): Loan_id of the last loan in current data, in case this contains some invalid status.
        Raises:        
    '''        
    logger.name = 'drop_invalid_delinquency_status'
    delinq_ids =  data[data['MBA_DELINQUENCY_STATUS'].isin(['0', 'R', 'S', 'T', 'X', 'Z'])]['LOAN_ID']
    groups = data[data['LOAN_ID'].isin(delinq_ids)][['LOAN_ID', 'PERIOD', 'MBA_DELINQUENCY_STATUS', 'DELINQUENCY_STATUS_NEXT']].groupby('LOAN_ID') 
    groups_list = list(groups)
    
    iuw= pd.Index([])
    
    if gflag != '': 
        try:
            iuw= iuw.union(groups.get_group(gflag).index[0:])
        except  Exception  as e:
            print(str(e))
                
    if data.iloc[-1]['LOAN_ID'] in groups.groups.keys():
        gflag = data.iloc[-1]['LOAN_ID']
    else:
        gflag = ''
                
    for k, group in groups_list: 
        li= group.index[(group['MBA_DELINQUENCY_STATUS'] =='S') | (group['MBA_DELINQUENCY_STATUS'] =='T') 
                         | (group['MBA_DELINQUENCY_STATUS'] =='X') | (group['MBA_DELINQUENCY_STATUS'] =='Z')].tolist()
        if li: iuw= iuw.union(group.index[group.index.get_loc(li[0]):])
        # In case of REO or Paid-Off, we need to exclude since the next record:
        df_delinq_01 = group[(group['MBA_DELINQUENCY_STATUS'] =='0') | (group['MBA_DELINQUENCY_STATUS'] =='R')]
        if df_delinq_01.shape[0]>0: 
            track_i = df_delinq_01.index[0]
            iuw= iuw.union(group.index[group.index.get_loc(track_i)+1:])
        
    if iuw!=[]:
        log_file.write('drop_invalid_delinquency_status - Total rows: %d\r\n' % len(iuw)) # (log_df.shape[0])
        data.drop(iuw, inplace=True) 
        logger.info('invalid_delinquency_status dropped')             
    
    return gflag

In [11]:
def zscore(x,mean,stdd):
    return (x - mean) / stdd

def zscore_apply(dist_file, data):            
    stddv_0 = []
    nnorm_cols = []
    for col_name in data.columns.values:                                
        mean = pd.Series(dist_file.iloc[0, np.where(pd.DataFrame(dist_file.columns.values)[0].str.contains(col_name+'_MEAN'))[0]], dtype='float32')    
        stddev = dist_file.iloc[0, np.where(pd.DataFrame(dist_file.columns.values)[0].str.contains(col_name+'_STDD'))[0]]    
        if not mean.empty and not stddev.empty:  
            mean = np.float32(mean.values[0])
            stddev = np.float32(stddev.values[0])            
            if stddev == 0: 
                stddv_0.append(col_name)        
            else:        
                data[col_name] = data[col_name].apply(lambda x: zscore(x, mean, stddev))                        
        else: 
            nnorm_cols.append(col_name)
    print('STANDARD DEV zero: ', stddv_0)        
    return data, nnorm_cols

In [12]:
def prepro_chunk(file_name, file_path, chunksize, label, log_file, nan_cols, categorical_cols, descriptive_cols, time_cols,
                 dist_file, with_index, refNorm, train_period, valid_period, test_period, robust_cols, 
                 minmax_cols=None, hdf=None, tfrec=None, filtering_cols=None):
    gflag = ''    
    i = 1                  
    train_index = 0
    valid_index = 0
    test_index = 0
    for chunk in pd.read_csv(file_path, chunksize = chunksize, sep=',', low_memory=False):    
        print('chunk: ', i, ' chunk size: ', chunk.shape[0])
        log_file.write('chunk: %d, chunk size: %d \n' % (i, chunk.shape[0]))
        chunk.columns = chunk.columns.str.upper()                            
        
        log_df = chunk[chunk[label].isnull()]
        log_file.write('Dropping Rows with Null Labels - Number of rows: %d\r\n' % (log_df.shape[0]))
        chunk.drop(chunk.index[chunk[label].isnull()], axis=0, inplace=True)
        
        log_df = chunk[chunk['INVALID_TRANSITIONS']==1]
        log_file.write('Dropping Rows with Invalid Transitions - Number of rows: %d\r\n' % (log_df.shape[0]))                                
        chunk.drop(chunk.index[chunk['INVALID_TRANSITIONS']==1], axis=0, inplace=True)        
        
        gflag = drop_invalid_delinquency_status(chunk, gflag, log_file)               
                    
        null_columns=chunk.columns[chunk.isnull().any()]
        log_df = chunk[chunk.isnull().any(axis=1)][null_columns]
        log_file.write('Filling NULL values - (rows, cols) : %d, %d\r\n' % (log_df.shape[0], log_df.shape[1]))                    
        log_df = chunk[null_columns].isnull().sum().to_frame().reset_index()
        log_df.to_csv(log_file, index=False, mode='a')                                    
        nan_cols = imputing_nan_values(nan_cols, dist_file)            
        chunk.fillna(value=nan_cols, inplace=True)   
        
        chunk.drop_duplicates(inplace=True) # Follow this instruction!!                        
        logger.info('dropping invalid transitions and delinquency status, fill nan values, drop duplicates')                  
        log_file.write('Drop duplicates - new size : %d\r\n' % (chunk.shape[0]))
                               
        chunk.reset_index(drop=True, inplace=True)  #don't remove this line! otherwise NaN values appears.
        chunk['ORIGINATION_YEAR'][chunk['ORIGINATION_YEAR']<1995] = "B1995"
        for k,v in categorical_cols.items():
            # if (chunk[k].dtype=='O'):                
            chunk[k] = chunk[k].astype('str')
            chunk[k] = chunk[k].str.strip()
            chunk[k].replace(['\.0$'], [''], regex=True,  inplace=True)
            new_cols = oneHotDummies_column(chunk[k], v)
            if (chunk[k].value_counts().sum()!=new_cols.sum().sum()):
                print('Error at categorization, different sizes', k)
                print(chunk[k].value_counts(), new_cols.sum())                
                log_file.write('Error at categorization, different sizes %s\r\n' % str(k))
                chunk[new_cols.columns] = new_cols
            else:
                chunk[new_cols.columns] = new_cols
                log_file.write('New columns added: %s\r\n' % str(new_cols.columns.values))
            
                    
        allfeatures_drop_cols(chunk, descriptive_cols)                    
        #np.savetxt(log_file, descriptive_cols, header='descriptive_cols dropped:', newline=" ")
        log_file.write('descriptive_cols dropped: %s\r\n' % str(descriptive_cols))
        allfeatures_drop_cols(chunk, time_cols)
        #np.savetxt(log_file, time_cols, header='time_cols dropped:', newline=" ")
        log_file.write('time_cols dropped: %s\r\n' % str(time_cols))
        cat_list = list(categorical_cols.keys())
        cat_list.remove('DELINQUENCY_STATUS_NEXT')
        #np.savetxt(log_file, cat_list, header='categorical_cols dropped:', newline=" ")
        log_file.write('categorical_cols dropped: %s\r\n' % str(cat_list))
        allfeatures_drop_cols(chunk, cat_list)

        chunk.reset_index(drop=True, inplace=True)  
        chunk.set_index(['DELINQUENCY_STATUS_NEXT', 'PERIOD'], append=False, inplace=True) #2 indexes
        # np.savetxt(log_file, str(chunk.index.names), header='Indexes created:', newline=" ")
        log_file.write('Indexes created: %s\r\n' % str(chunk.index.names))
         
        if (filtering_cols!=None):
            chunk = chunk[filtering_cols]
            robust_cols = list(set(robust_cols).intersection(filtering_cols))
            log_file.write('Columns Filtered: %s\r\n' % str(chunk.columns.values))
        
        if chunk.isnull().any().any(): 
            # from IPython.core.debugger import Tracer; Tracer()()
            raise ValueError('There are null values...File: ' + file_name)   
                        
        if (refNorm==True):            
            chunk[robust_cols], nnorm_cols =  zscore_apply(dist_file, chunk[robust_cols]) #robust_normalizer.transform(chunk[robust_cols])            
            log_file.write('Columns not normalized: %s\r\n' % str(nnorm_cols))            
            log_file.write('Columns normalized: %s\r\n' % str(set(robust_cols)-set(nnorm_cols)))
            
        
        if chunk.isnull().any().any(): raise ValueError('There are null values...File: ' + file_name)       
        
        chunk_periods = set(list(chunk.index.get_level_values('PERIOD')))
        #print(tfrec)
        if (tfrec!=None):
            train_index = tag_chunk('train', label, chunk, chunk_periods, train_period, log_file, with_index, train_index, tfrec=tfrec[0])
            valid_index = tag_chunk('valid', label, chunk, chunk_periods, valid_period, log_file, with_index, valid_index, tfrec=tfrec[1])
            test_index = tag_chunk('test', label, chunk, chunk_periods, test_period, log_file, with_index, test_index, tfrec=tfrec[2])
            sys.stdout.flush()
        elif (hdf!=None):
            train_index = tag_chunk('train', label, chunk, chunk_periods, train_period, log_file, with_index, train_index, hdf=hdf[0])
            valid_index = tag_chunk('valid', label, chunk, chunk_periods, valid_period, log_file, with_index, valid_index, hdf=hdf[1])
            test_index = tag_chunk('test', label, chunk, chunk_periods, test_period, log_file, with_index, test_index, hdf=hdf[2])                
        
        inter_periods = list(chunk_periods.intersection(set(range(test_period[1]+1,355))))    
        log_file.write('Periods greater than test_period: %s\r\n' % str(inter_periods))
        p_chunk = chunk.loc[(slice(None), inter_periods), :]
        log_file.write('Records greater than test_period - Number of rows: %d\r\n' % (p_chunk.shape[0]))
        
        del chunk        
        i +=  1   
    
    return train_index, valid_index, test_index

In [13]:
def custom_robust_normalizer(ncols, dist_file, normalizer_type='robust_scaler_sk', center_value='median'):            
    norm_cols = []
    scales = []
    centers = []
    scales_0 =[]
    for i, x in enumerate (ncols):                        
        x_frame = dist_file.iloc[:, np.where(pd.DataFrame(dist_file.columns.values)[0].str.contains(x+'_Q'))[0]]    
        if not x_frame.empty and (x_frame.shape[1]>1):       
            iqr = float(pd.to_numeric(x_frame[x+'_Q3'], errors='coerce').subtract(pd.to_numeric(x_frame[x+'_Q1'], errors='coerce')))
            if iqr == 0: scales_0.append(x)
            if iqr!=0: 
                norm_cols.append(x)                
                scales.append(iqr)                    
                if center_value == 'median':
                    centers.append( float(x_frame[x+'_MEDIAN']) )   
                else:
                    centers.append( float(x_frame[x+'_Q1']) )                                       
    if (normalizer_type == 'robust_scaler_sk'):    
        normalizer = RobustScaler()
        normalizer.scale_ = scales
        normalizer.center_ = centers        
    elif (normalizer_type == 'percentile_scaler'):    
        normalizer = Normalizer.Normalizer(scales, centers)     
    else: normalizer=None                  
    
    print(scales_0)
    
    return norm_cols, normalizer

In [14]:
def custom_minmax_normalizer(ncols, scales, dist_file):    
    norm_cols = []
    minmax_scales = []
    centers = []
    for i, x in enumerate (ncols):  
        x_min = dist_file.iloc[0, np.where(pd.DataFrame(dist_file.columns.values)[0].str.contains(x+'_MIN'))[0]]
        x_max = dist_file.iloc[0, np.where(pd.DataFrame(dist_file.columns.values)[0].str.contains(x+'_MAX'))[0]]
        if not(x_min.empty) and not(x_max.empty):            
            x_min = np.float32(x_min.values[0])
            x_max = np.float32(x_max.values[0])
            minmax_scales.append(x_max - x_min)                            
            centers.append(x_min)
            norm_cols.append(x)
            # to_delete.append(i)
        
    normalizer = Normalizer.Normalizer(minmax_scales, centers)         
    
    return norm_cols, normalizer #, to_delete

In [15]:
def allfeatures_preprocessing(RAW_DIR, PRO_DIR, raw_dir, train_period, valid_period, test_period, dividing='percentage', 
                              chunksize=500000, refNorm=True, with_index=True, output_hdf=True, 
                              label='DELINQUENCY_STATUS_NEXT', filtering_cols=None):            

    descriptive_cols = [
    'LOAN_ID',
    'ASOFMONTH',        
    'PERIOD_NEXT',
    'MOD_PER_FROM',
    'MOD_PER_TO',
    'PROPERTY_ZIP',
    'INVALID_TRANSITIONS'
    ]

    numeric_cols = ['MBA_DAYS_DELINQUENT', 'MBA_DAYS_DELINQUENT_NAN',
       'CURRENT_INTEREST_RATE', 'CURRENT_INTEREST_RATE_NAN', 'LOANAGE', 'LOANAGE_NAN',
       'CURRENT_BALANCE', 'CURRENT_BALANCE_NAN', 'SCHEDULED_PRINCIPAL',
       'SCHEDULED_PRINCIPAL_NAN', 'SCHEDULED_MONTHLY_PANDI',
       'SCHEDULED_MONTHLY_PANDI_NAN', 
       'LLMA2_CURRENT_INTEREST_SPREAD', 'LLMA2_CURRENT_INTEREST_SPREAD_NAN',  
       'LLMA2_C_IN_LAST_12_MONTHS',
       'LLMA2_30_IN_LAST_12_MONTHS', 'LLMA2_60_IN_LAST_12_MONTHS',
       'LLMA2_90_IN_LAST_12_MONTHS', 'LLMA2_FC_IN_LAST_12_MONTHS',
       'LLMA2_REO_IN_LAST_12_MONTHS', 'LLMA2_0_IN_LAST_12_MONTHS',
       'LLMA2_HIST_LAST_12_MONTHS_MIS', 
       'NUM_MODIF', 'NUM_MODIF_NAN', 'P_RATE_TO_MOD', 'P_RATE_TO_MOD_NAN', 'MOD_RATE',
       'MOD_RATE_NAN', 'DIF_RATE', 'DIF_RATE_NAN', 'P_MONTHLY_PAY',
       'P_MONTHLY_PAY_NAN', 'MOD_MONTHLY_PAY', 'MOD_MONTHLY_PAY_NAN',
       'DIF_MONTHLY_PAY', 'DIF_MONTHLY_PAY_NAN', 'CAPITALIZATION_AMT',
       'CAPITALIZATION_AMT_NAN', 'MORTGAGE_RATE', 'MORTGAGE_RATE_NAN',
       'FICO_SCORE_ORIGINATION', 'INITIAL_INTEREST_RATE', 'ORIGINAL_LTV',
       'ORIGINAL_BALANCE', 'BACKEND_RATIO', 'BACKEND_RATIO_NAN',
       'ORIGINAL_TERM', 'ORIGINAL_TERM_NAN', 'SALE_PRICE', 'SALE_PRICE_NAN', 	   
       'PREPAY_PENALTY_TERM', 'PREPAY_PENALTY_TERM_NAN', 
        'NUMBER_OF_UNITS', 'NUMBER_OF_UNITS_NAN', 'MARGIN',
       'MARGIN_NAN', 'PERIODIC_RATE_CAP', 'PERIODIC_RATE_CAP_NAN',
       'PERIODIC_RATE_FLOOR', 'PERIODIC_RATE_FLOOR_NAN', 'LIFETIME_RATE_CAP',
       'LIFETIME_RATE_CAP_NAN', 'LIFETIME_RATE_FLOOR',
       'LIFETIME_RATE_FLOOR_NAN', 'RATE_RESET_FREQUENCY',
       'RATE_RESET_FREQUENCY_NAN', 'PAY_RESET_FREQUENCY',
       'PAY_RESET_FREQUENCY_NAN', 'FIRST_RATE_RESET_PERIOD',
       'FIRST_RATE_RESET_PERIOD_NAN', 	   
        'LLMA2_PRIME',
       'LLMA2_SUBPRIME', 'LLMA2_APPVAL_LT_SALEPRICE', 'LLMA2_ORIG_RATE_SPREAD',
       'LLMA2_ORIG_RATE_SPREAD_NAN', 'AGI', 'AGI_NAN', 'UR', 'UR_NAN', 'LLMA2_ORIG_RATE_ORIG_MR_SPREAD', 
       'LLMA2_ORIG_RATE_ORIG_MR_SPREAD_NAN', 'COUNT_INT_RATE_LESS', 'NUM_PRIME_ZIP', 'NUM_PRIME_ZIP_NAN'
       ]

    '''
    nan_cols = {'MBA_DAYS_DELINQUENT': 'median', 'CURRENT_INTEREST_RATE': 'median', 'LOANAGE': 'median',
                'CURRENT_BALANCE' : 'median', 'SCHEDULED_PRINCIPAL': 'median', 'SCHEDULED_MONTHLY_PANDI': 'median',       
                'LLMA2_CURRENT_INTEREST_SPREAD': 'median', 'NUM_MODIF': 0, 'P_RATE_TO_MOD': 0, 'MOD_RATE': 0,
                'DIF_RATE': 0, 'P_MONTHLY_PAY': 0, 'MOD_MONTHLY_PAY': 0, 'DIF_MONTHLY_PAY': 0, 'CAPITALIZATION_AMT': 0,
                'MORTGAGE_RATE': 'median', 'FICO_SCORE_ORIGINATION': 'median', 'INITIAL_INTEREST_RATE': 'median', 'ORIGINAL_LTV': 'median',
                'ORIGINAL_BALANCE': 'median', 'BACKEND_RATIO': 'median', 'ORIGINAL_TERM': 'median', 'SALE_PRICE': 'median', 'PREPAY_PENALTY_TERM': 'median',
                'NUMBER_OF_UNITS': 'median', 'MARGIN': 'median', 'PERIODIC_RATE_CAP': 'median', 'PERIODIC_RATE_FLOOR': 'median', 'LIFETIME_RATE_CAP': 'median',
                'LIFETIME_RATE_FLOOR': 'median', 'RATE_RESET_FREQUENCY': 'median', 'PAY_RESET_FREQUENCY': 'median',
                'FIRST_RATE_RESET_PERIOD': 'median', 'LLMA2_ORIG_RATE_SPREAD': 'median', 'AGI': 'median', 'UR': 'median',
                'LLMA2_C_IN_LAST_12_MONTHS': 'median', 'LLMA2_30_IN_LAST_12_MONTHS': 'median', 'LLMA2_60_IN_LAST_12_MONTHS': 'median',
                'LLMA2_90_IN_LAST_12_MONTHS': 'median', 'LLMA2_FC_IN_LAST_12_MONTHS': 'median',
                'LLMA2_REO_IN_LAST_12_MONTHS': 'median', 'LLMA2_0_IN_LAST_12_MONTHS': 'median', 
                'LLMA2_ORIG_RATE_ORIG_MR_SPREAD':0, 'COUNT_INT_RATE_LESS' :'median', 'NUM_PRIME_ZIP':'median'
                }
    '''
    nan_cols = {'MBA_DAYS_DELINQUENT': 'mean', 'CURRENT_INTEREST_RATE': 'mean', 'LOANAGE': 'mean',
                'CURRENT_BALANCE' : 'mean', 'SCHEDULED_PRINCIPAL': 'mean', 'SCHEDULED_MONTHLY_PANDI': 'mean',       
                'LLMA2_CURRENT_INTEREST_SPREAD': 'mean', 'NUM_MODIF': 0, 'P_RATE_TO_MOD': 0, 'MOD_RATE': 0,
                'DIF_RATE': 0, 'P_MONTHLY_PAY': 0, 'MOD_MONTHLY_PAY': 0, 'DIF_MONTHLY_PAY': 0, 'CAPITALIZATION_AMT': 0,
                'MORTGAGE_RATE': 'mean', 'FICO_SCORE_ORIGINATION': 'mean', 'INITIAL_INTEREST_RATE': 'mean', 'ORIGINAL_LTV': 'mean',
                'ORIGINAL_BALANCE': 'mean', 'BACKEND_RATIO': 'mean', 'ORIGINAL_TERM': 'mean', 'SALE_PRICE': 'mean', 'PREPAY_PENALTY_TERM': 'mean',
                'NUMBER_OF_UNITS': 'mean', 'MARGIN': 'mean', 'PERIODIC_RATE_CAP': 'mean', 'PERIODIC_RATE_FLOOR': 'mean', 'LIFETIME_RATE_CAP': 'mean',
                'LIFETIME_RATE_FLOOR': 'mean', 'RATE_RESET_FREQUENCY': 'mean', 'PAY_RESET_FREQUENCY': 'mean',
                'FIRST_RATE_RESET_PERIOD': 'mean', 'LLMA2_ORIG_RATE_SPREAD': 'mean', 'AGI': 'mean', 'UR': 'mean',
                'LLMA2_C_IN_LAST_12_MONTHS': 'mean', 'LLMA2_30_IN_LAST_12_MONTHS': 'mean', 'LLMA2_60_IN_LAST_12_MONTHS': 'mean',
                'LLMA2_90_IN_LAST_12_MONTHS': 'mean', 'LLMA2_FC_IN_LAST_12_MONTHS': 'mean',
                'LLMA2_REO_IN_LAST_12_MONTHS': 'mean', 'LLMA2_0_IN_LAST_12_MONTHS': 'mean', 
                'LLMA2_ORIG_RATE_ORIG_MR_SPREAD':0, 'COUNT_INT_RATE_LESS' :'mean', 'NUM_PRIME_ZIP':'mean'
                }
    
    categorical_cols = {'MBA_DELINQUENCY_STATUS':  ['0','3','6','9','C','F','R'], 'DELINQUENCY_STATUS_NEXT': ['0','3','6','9','C','F','R'],  #,'S','T','X'
                           'BUYDOWN_FLAG': ['N','U','Y'], 'NEGATIVE_AMORTIZATION_FLAG': ['N','U','Y'], 'PREPAY_PENALTY_FLAG': ['N','U','Y'],
                           'OCCUPANCY_TYPE': ['1','2','3','U'], 'PRODUCT_TYPE': ['10','20','30','40','50','51','52','53','54','5A','5Z',
                                            '60','61','62','63','6Z','70','80','81','82','83','84','8Z','U'], 
                           'PROPERTY_TYPE': ['1','2','3','4','5','6','7','8','9','M','U','Z'], 'LOAN_PURPOSE_CATEGORY': ['P','R','U'], 
                           'DOCUMENTATION_TYPE': ['1','2','3','U'], 'CHANNEL': ['1','2','3','4','5','6','7','8','9','A','B','C','D','U'], 
                           'LOAN_TYPE': ['1','2','3','4','5','6','U'], 'IO_FLAG': ['N','U','Y'], 
                           'CONVERTIBLE_FLAG': ['N','U','Y'], 'POOL_INSURANCE_FLAG': ['N','U','Y'], 'STATE': ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO',
                                               'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 
                                               'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 
                                               'NY', 'OH', 'OK', 'OR', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 
                                               'WA', 'WI', 'WV', 'WY'], 
                           'CURRENT_INVESTOR_CODE': ['240', '250', '253', 'U'], 'ORIGINATION_YEAR': ['B1995','1995','1996','1997','1998','1999','2000','2001','2002','2003',
                                                    '2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018']}

    time_cols = ['YEAR', 'MONTH'] #, 'PERIOD'] #no nan values        

    total_cols = numeric_cols.copy() 
    total_cols.extend(descriptive_cols)
    total_cols.extend(categorical_cols.keys())
    total_cols.extend(time_cols)
    print('total_cols size: ', len(total_cols)) #110 !=112?? set(chunk_cols) - set(total_cols): {'LOAN_ID', 'PERIOD'}
    
    pd.set_option('io.hdf.default_format','table')

    dist_file = pd.read_csv(os.path.join(RAW_DIR, "percentile features3-mean.csv"), sep=';', low_memory=False)
    dist_file.columns = dist_file.columns.str.upper()

    ncols = [x for x in numeric_cols if x.find('NAN')<0]
    print(ncols)

    #sum = 0
    #for elem in categorical_cols.values():
    #    sum += len(elem)
    #print('total categorical values: ', sum) #181

    for file_path in glob.glob(os.path.join(RAW_DIR, raw_dir,"*.txt")):  
        file_name = os.path.basename(file_path)
        if with_index==True:
            target_path = os.path.join(PRO_DIR, raw_dir,file_name[:-4])        
        else:
            target_path = os.path.join(PRO_DIR, raw_dir,file_name[:-4]+'_non_index')
        log_file=open(target_path+'-log.txt', 'w+', 1)        
        print('Preprocessing File: ' + file_path)
        log_file.write('Preprocessing File:  %s\r\n' % file_path)
        startTime = datetime.now()      
        
        if (output_hdf == True):
            #with  pd.HDFStore(target_path +'-pp.h5', complib='lzo', complevel=9) as hdf: #complib='lzo', complevel=9
            train_writer = pd.HDFStore(target_path +'-train_.h5', complib='lzo', complevel=9) 
            valid_writer = pd.HDFStore(target_path +'-valid_.h5', complib='lzo', complevel=9)
            test_writer = pd.HDFStore(target_path +'-test_.h5', complib='lzo', complevel=9) 

            print('generating: ', target_path +'-pp.h5')
            train_index, valid_index, test_index = prepro_chunk(file_name, file_path, chunksize, label, log_file, 
                                                                nan_cols, categorical_cols, descriptive_cols, time_cols,
                                                                dist_file, with_index, 
                                                                refNorm, train_period, valid_period, test_period, ncols,                                                                
                                                                hdf=[train_writer, valid_writer, test_writer], tfrec=None,
                                                                filtering_cols=filtering_cols)            


            if train_writer.get_storer('train/features').nrows != train_writer.get_storer('train/labels').nrows:
                    raise ValueError('Train-DataSet: Sizes should match!')  
            if valid_writer.get_storer('valid/features').nrows != valid_writer.get_storer('valid/labels').nrows:
                    raise ValueError('Valid-DataSet: Sizes should match!')  
            if test_writer.get_storer('test/features').nrows != test_writer.get_storer('test/labels').nrows:
                    raise ValueError('Test-DataSet: Sizes should match!')  

            print('train/features size: ', train_writer.get_storer('train/features').nrows)
            print('valid/features size: ', valid_writer.get_storer('valid/features').nrows)
            print('test/features size: ', test_writer.get_storer('test/features').nrows)

            log_file.write('***SUMMARY***\n')
            log_file.write('train/features size: %d\r\n' %(train_writer.get_storer('train/features').nrows))
            log_file.write('valid/features size: %d\r\n' %(valid_writer.get_storer('valid/features').nrows))
            log_file.write('test/features size: %d\r\n' %(test_writer.get_storer('test/features').nrows))

            logger.info('training, validation and testing set into .h5 file')        
        else:        
            train_writer = tf.python_io.TFRecordWriter(target_path +'-train_.tfrecords')
            valid_writer = tf.python_io.TFRecordWriter(target_path +'-valid_.tfrecords')
            test_writer = tf.python_io.TFRecordWriter(target_path +'-test_.tfrecords')
            train_index, valid_index, test_index = prepro_chunk(file_name, file_path, chunksize, label, log_file, 
                                                                nan_cols, categorical_cols, descriptive_cols, time_cols,
                                                                dist_file, with_index, 
                                                                refNorm, train_period, valid_period, test_period, ncols,
                                                                hdf=None, tfrec=[train_writer, valid_writer, test_writer],
                                                                filtering_cols=filtering_cols) 
        print(train_index, valid_index, test_index)
        train_writer.close()
        valid_writer.close()
        test_writer.close()        
        
        #def allfeatures_prepro_file(RAW_DIR, file_path, raw_dir, file_name, target_path, train_period, valid_period, test_period, log_file, dividing='percentage', chunksize=500000, 
        #                    refNorm=True, , with_index=True, output_hdf=True):

        #allfeatures_prepro_file(RAW_DIR, file_path, raw_dir, file_name, target_path, train_num, valid_num, test_num, log_file, dividing=dividing, chunksize=chunksize, 
        #                        refNorm=refNorm, with_index=with_index, output_hdf=output_hdf)          
        
        startTime = datetime.now() - startTime
        print('Preprocessing Time per file: ', startTime)     
        log_file.write('Preprocessing Time per file:  %s\r\n' % str(startTime))
        log_file.close()


In [16]:
startTime = datetime.now()

if not os.path.exists(os.path.join(PRO_DIR, FLAGS.prepro_dir)): #os.path.exists
    os.makedirs(os.path.join(PRO_DIR, FLAGS.prepro_dir))

# 26 features in numerical_cols (size=50) selected by per-class dataset with n_components=None:     
ncols = ['LOANAGE',
'MARGIN', 
'MORTGAGE_RATE',
'LLMA2_ORIG_RATE_ORIG_MR_SPREAD', 
'LLMA2_HIST_LAST_12_MONTHS_MIS', 
'COUNT_INT_RATE_LESS',
'LIFETIME_RATE_FLOOR', 
'INITIAL_INTEREST_RATE',
'LIFETIME_RATE_CAP', 
'LLMA2_PRIME',
'LLMA2_ORIG_RATE_SPREAD', 
'ORIGINAL_BALANCE',
'CURRENT_BALANCE',
'UR',
'LLMA2_SUBPRIME',
'MOD_RATE', 
'LLMA2_CURRENT_INTEREST_SPREAD',
'RATE_RESET_FREQUENCY',
'CURRENT_INTEREST_RATE', 
'PAY_RESET_FREQUENCY', 
'DIF_RATE',  
'NUM_MODIF', 
'AGI', 
'PERIODIC_RATE_FLOOR',
'LLMA2_30_IN_LAST_12_MONTHS',
'LLMA2_C_IN_LAST_12_MONTHS'] 

all_nan_cols = ['MBA_DAYS_DELINQUENT_NAN',
 'CURRENT_INTEREST_RATE_NAN',
 'LOANAGE_NAN',
 'CURRENT_BALANCE_NAN',
 'SCHEDULED_PRINCIPAL_NAN',
 'SCHEDULED_MONTHLY_PANDI_NAN',
 'LLMA2_CURRENT_INTEREST_SPREAD_NAN',
 'NUM_MODIF_NAN',
 'P_RATE_TO_MOD_NAN',
 'MOD_RATE_NAN',
 'DIF_RATE_NAN',
 'P_MONTHLY_PAY_NAN',
 'MOD_MONTHLY_PAY_NAN',
 'DIF_MONTHLY_PAY_NAN',
 'CAPITALIZATION_AMT_NAN',
 'MORTGAGE_RATE_NAN',
 'BACKEND_RATIO_NAN',
 'ORIGINAL_TERM_NAN',
 'SALE_PRICE_NAN',
 'PREPAY_PENALTY_TERM_NAN',
 'NUMBER_OF_UNITS_NAN',
 'MARGIN_NAN',
 'PERIODIC_RATE_CAP_NAN',
 'PERIODIC_RATE_FLOOR_NAN',
 'LIFETIME_RATE_CAP_NAN',
 'LIFETIME_RATE_FLOOR_NAN',
 'RATE_RESET_FREQUENCY_NAN',
 'PAY_RESET_FREQUENCY_NAN',
 'FIRST_RATE_RESET_PERIOD_NAN',
 'LLMA2_ORIG_RATE_SPREAD_NAN',
 'AGI_NAN',
 'UR_NAN',
 'LLMA2_ORIG_RATE_ORIG_MR_SPREAD_NAN',
 'NUM_PRIME_ZIP_NAN']

sel_nan_cols = [x for x in all_nan_cols for y in ncols if x.find(y)==0]

cat_cols = ['MBA_DELINQUENCY_STATUS_0', 'MBA_DELINQUENCY_STATUS_3',
            'MBA_DELINQUENCY_STATUS_6', 'MBA_DELINQUENCY_STATUS_9', 
            'MBA_DELINQUENCY_STATUS_C', 'MBA_DELINQUENCY_STATUS_F', 'MBA_DELINQUENCY_STATUS_R'] + \
 ['BUYDOWN_FLAG_N', 'BUYDOWN_FLAG_U', 'BUYDOWN_FLAG_Y'] + \
 ['NEGATIVE_AMORTIZATION_FLAG_N', 'NEGATIVE_AMORTIZATION_FLAG_U', 'NEGATIVE_AMORTIZATION_FLAG_Y'] +\
 ['PREPAY_PENALTY_FLAG_N', 'PREPAY_PENALTY_FLAG_U', 'PREPAY_PENALTY_FLAG_Y'] +\
 ['OCCUPANCY_TYPE_1', 'OCCUPANCY_TYPE_2', 'OCCUPANCY_TYPE_3', 'OCCUPANCY_TYPE_U'] +\
 ['PRODUCT_TYPE_10', 'PRODUCT_TYPE_20', 'PRODUCT_TYPE_30', 'PRODUCT_TYPE_40',
 'PRODUCT_TYPE_50', 'PRODUCT_TYPE_51', 'PRODUCT_TYPE_52', 'PRODUCT_TYPE_53',
 'PRODUCT_TYPE_54', 'PRODUCT_TYPE_5A', 'PRODUCT_TYPE_5Z', 'PRODUCT_TYPE_60',
 'PRODUCT_TYPE_61', 'PRODUCT_TYPE_62', 'PRODUCT_TYPE_63', 'PRODUCT_TYPE_6Z',
 'PRODUCT_TYPE_70', 'PRODUCT_TYPE_80', 'PRODUCT_TYPE_81', 'PRODUCT_TYPE_82',
 'PRODUCT_TYPE_83', 'PRODUCT_TYPE_84', 'PRODUCT_TYPE_8Z', 'PRODUCT_TYPE_U'] +\
 ['PROPERTY_TYPE_1', 'PROPERTY_TYPE_2', 'PROPERTY_TYPE_3', 'PROPERTY_TYPE_4',
 'PROPERTY_TYPE_5', 'PROPERTY_TYPE_6', 'PROPERTY_TYPE_7', 'PROPERTY_TYPE_8',
 'PROPERTY_TYPE_9', 'PROPERTY_TYPE_M', 'PROPERTY_TYPE_U', 'PROPERTY_TYPE_Z'] +\
 ['LOAN_PURPOSE_CATEGORY_P', 'LOAN_PURPOSE_CATEGORY_R', 'LOAN_PURPOSE_CATEGORY_U'] +\
 ['DOCUMENTATION_TYPE_1', 'DOCUMENTATION_TYPE_2', 'DOCUMENTATION_TYPE_3', 'DOCUMENTATION_TYPE_U'] +\
 ['CHANNEL_1', 'CHANNEL_2', 'CHANNEL_3', 'CHANNEL_4', 'CHANNEL_5', 'CHANNEL_6',
 'CHANNEL_7', 'CHANNEL_8', 'CHANNEL_9', 'CHANNEL_A', 'CHANNEL_B', 'CHANNEL_C',
 'CHANNEL_D', 'CHANNEL_U'] +\
 ['LOAN_TYPE_1', 'LOAN_TYPE_2', 'LOAN_TYPE_3', 'LOAN_TYPE_4', 'LOAN_TYPE_5', 'LOAN_TYPE_6', 'LOAN_TYPE_U'] +\
 ['IO_FLAG_N', 'IO_FLAG_U', 'IO_FLAG_Y'] +\
 ['CONVERTIBLE_FLAG_N', 'CONVERTIBLE_FLAG_U', 'CONVERTIBLE_FLAG_Y'] +\
 ['POOL_INSURANCE_FLAG_N', 'POOL_INSURANCE_FLAG_U', 'POOL_INSURANCE_FLAG_Y'] +\
 ['STATE_AK', 'STATE_AL', 'STATE_AR', 'STATE_AZ', 'STATE_CA', 'STATE_CO',
 'STATE_CT', 'STATE_DC', 'STATE_DE', 'STATE_FL', 'STATE_GA', 'STATE_HI',
 'STATE_IA', 'STATE_ID', 'STATE_IL', 'STATE_IN', 'STATE_KS', 'STATE_KY',
 'STATE_LA', 'STATE_MA', 'STATE_MD', 'STATE_ME', 'STATE_MI', 'STATE_MN',
 'STATE_MO', 'STATE_MS', 'STATE_MT', 'STATE_NC', 'STATE_ND', 'STATE_NE',
 'STATE_NH', 'STATE_NJ', 'STATE_NM', 'STATE_NV', 'STATE_NY', 'STATE_OH',
 'STATE_OK', 'STATE_OR', 'STATE_PA', 'STATE_PR', 'STATE_RI', 'STATE_SC',
 'STATE_SD', 'STATE_TN', 'STATE_TX', 'STATE_UT', 'STATE_VA', 'STATE_VT',
 'STATE_WA', 'STATE_WI', 'STATE_WV', 'STATE_WY'] +\
 ['CURRENT_INVESTOR_CODE_240', 'CURRENT_INVESTOR_CODE_250', 'CURRENT_INVESTOR_CODE_253', 'CURRENT_INVESTOR_CODE_U'] +\
 ['ORIGINATION_YEAR_B1995', 'ORIGINATION_YEAR_1995', 'ORIGINATION_YEAR_1996',
 'ORIGINATION_YEAR_1997', 'ORIGINATION_YEAR_1998', 'ORIGINATION_YEAR_1999',
 'ORIGINATION_YEAR_2000', 'ORIGINATION_YEAR_2001', 'ORIGINATION_YEAR_2002',
 'ORIGINATION_YEAR_2003', 'ORIGINATION_YEAR_2004', 'ORIGINATION_YEAR_2005',
 'ORIGINATION_YEAR_2006', 'ORIGINATION_YEAR_2007', 'ORIGINATION_YEAR_2008',
 'ORIGINATION_YEAR_2009', 'ORIGINATION_YEAR_2010', 'ORIGINATION_YEAR_2011',
 'ORIGINATION_YEAR_2012', 'ORIGINATION_YEAR_2013', 'ORIGINATION_YEAR_2014',
 'ORIGINATION_YEAR_2015', 'ORIGINATION_YEAR_2016', 'ORIGINATION_YEAR_2017',
 'ORIGINATION_YEAR_2018']

lab_cols = ['DELINQUENCY_STATUS_NEXT_0', 'DELINQUENCY_STATUS_NEXT_3',
'DELINQUENCY_STATUS_NEXT_6', 'DELINQUENCY_STATUS_NEXT_9',
'DELINQUENCY_STATUS_NEXT_C', 'DELINQUENCY_STATUS_NEXT_F',
'DELINQUENCY_STATUS_NEXT_R']

allcols = ncols + sel_nan_cols + cat_cols + lab_cols

allfeatures_preprocessing(RAW_DIR, PRO_DIR, FLAGS.prepro_dir, FLAGS.train_period, FLAGS.valid_period, FLAGS.test_period, dividing='percentage', 
                          chunksize=FLAGS.prepro_chunksize, refNorm=FLAGS.ref_norm, with_index=FLAGS.prepro_with_index, output_hdf=True, filtering_cols=allcols)        
print('Preprocessing - Time: ', datetime.now() - startTime)

total_cols size:  111
['MBA_DAYS_DELINQUENT', 'CURRENT_INTEREST_RATE', 'LOANAGE', 'CURRENT_BALANCE', 'SCHEDULED_PRINCIPAL', 'SCHEDULED_MONTHLY_PANDI', 'LLMA2_CURRENT_INTEREST_SPREAD', 'LLMA2_C_IN_LAST_12_MONTHS', 'LLMA2_30_IN_LAST_12_MONTHS', 'LLMA2_60_IN_LAST_12_MONTHS', 'LLMA2_90_IN_LAST_12_MONTHS', 'LLMA2_FC_IN_LAST_12_MONTHS', 'LLMA2_REO_IN_LAST_12_MONTHS', 'LLMA2_0_IN_LAST_12_MONTHS', 'LLMA2_HIST_LAST_12_MONTHS_MIS', 'NUM_MODIF', 'P_RATE_TO_MOD', 'MOD_RATE', 'DIF_RATE', 'P_MONTHLY_PAY', 'MOD_MONTHLY_PAY', 'DIF_MONTHLY_PAY', 'CAPITALIZATION_AMT', 'MORTGAGE_RATE', 'FICO_SCORE_ORIGINATION', 'INITIAL_INTEREST_RATE', 'ORIGINAL_LTV', 'ORIGINAL_BALANCE', 'BACKEND_RATIO', 'ORIGINAL_TERM', 'SALE_PRICE', 'PREPAY_PENALTY_TERM', 'NUMBER_OF_UNITS', 'MARGIN', 'PERIODIC_RATE_CAP', 'PERIODIC_RATE_FLOOR', 'LIFETIME_RATE_CAP', 'LIFETIME_RATE_FLOOR', 'RATE_RESET_FREQUENCY', 'PAY_RESET_FREQUENCY', 'FIRST_RATE_RESET_PERIOD', 'LLMA2_PRIME', 'LLMA2_SUBPRIME', 'LLMA2_APPVAL_LT_SALEPRICE', 'LLMA2_ORIG_RAT

2018-11-23 16:10:22,038 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:10:26,929 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:10:39,871 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:10:40,257 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:10:40,595 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 400455


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:11:17,302 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:11:17,540 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:11:20,796 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:11:20,852 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 9044


2018-11-23 16:11:21,020 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:11:21,079 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 16833
chunk:  2  chunk size:  500000


2018-11-23 16:11:38,718 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:11:43,605 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:11:55,909 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:11:56,269 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:11:56,588 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 391441


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:12:34,082 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:12:34,324 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:12:37,331 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:12:37,392 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 9242


2018-11-23 16:12:37,576 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:12:37,634 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 16876
chunk:  3  chunk size:  500000


2018-11-23 16:12:54,949 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:12:59,345 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:13:11,848 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:13:12,211 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:13:12,534 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 389094


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:13:48,260 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:13:48,489 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:13:51,616 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:13:51,674 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10148


2018-11-23 16:13:51,897 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:13:51,968 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 18481
chunk:  4  chunk size:  500000


2018-11-23 16:14:08,587 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:14:13,202 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:14:26,333 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:14:26,712 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:14:27,039 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 394963


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:15:05,695 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:15:06,011 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:15:08,765 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:15:08,819 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11168


2018-11-23 16:15:09,023 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:15:09,078 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20509
chunk:  5  chunk size:  500000


2018-11-23 16:15:27,791 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:15:32,785 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:15:46,425 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:15:46,857 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:15:47,202 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 392167


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:16:23,408 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:16:23,623 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:16:26,352 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:16:26,402 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11674


2018-11-23 16:16:26,639 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:16:26,691 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 21682
chunk:  6  chunk size:  500000


2018-11-23 16:16:43,894 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:16:48,366 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:17:00,866 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:17:01,219 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:17:01,527 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 385419


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:17:37,400 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:17:37,613 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:17:40,312 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:17:40,363 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11994


2018-11-23 16:17:40,636 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:17:40,691 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 22501
chunk:  7  chunk size:  500000


2018-11-23 16:17:58,283 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:18:02,639 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:18:15,166 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:18:15,524 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:18:15,831 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 385446


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:18:53,606 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:18:53,854 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:18:57,399 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:18:57,462 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12256


2018-11-23 16:18:57,827 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:18:57,903 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 22881
chunk:  8  chunk size:  500000


2018-11-23 16:19:16,713 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:19:21,526 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:19:34,676 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:19:35,028 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:19:35,332 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 376263


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:20:10,079 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:20:10,296 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:20:13,537 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:20:13,587 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12064


2018-11-23 16:20:13,856 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:20:13,925 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 22931
chunk:  9  chunk size:  500000


2018-11-23 16:20:31,614 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:20:36,719 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:20:49,283 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:20:49,635 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:20:49,939 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 384041


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:21:26,150 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:21:26,367 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:21:29,326 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:21:29,378 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11883


2018-11-23 16:21:29,660 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:21:29,712 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 22427
chunk:  10  chunk size:  500000


2018-11-23 16:21:44,884 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:21:49,328 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:22:01,637 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:22:01,994 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:22:02,291 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 387220


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:22:37,450 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:22:37,661 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:22:40,581 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:22:40,639 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12425


2018-11-23 16:22:40,953 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:22:41,006 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 23225
chunk:  11  chunk size:  500000


2018-11-23 16:22:57,155 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:23:01,431 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:23:13,739 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:23:14,095 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:23:14,428 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 376512


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:23:48,850 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:23:49,057 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:23:51,869 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:23:51,920 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12634


2018-11-23 16:23:52,247 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:23:52,301 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 23646
chunk:  12  chunk size:  500000


2018-11-23 16:24:08,577 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:24:13,085 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:24:25,262 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:24:25,618 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:24:25,914 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 376052


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:25:00,104 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:25:00,314 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:25:03,007 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:25:03,059 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13488


2018-11-23 16:25:03,448 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:25:03,507 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 25849
chunk:  13  chunk size:  500000


2018-11-23 16:25:20,439 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:25:24,717 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:25:36,618 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:25:36,968 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:25:37,262 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 362143


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:26:14,392 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:26:14,624 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:26:17,342 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:26:17,396 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13920


2018-11-23 16:26:17,602 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:26:17,666 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 26886
chunk:  14  chunk size:  500000


2018-11-23 16:26:35,475 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:26:39,921 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:26:52,548 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:26:52,898 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:26:53,203 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 370480


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:27:28,196 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:27:28,408 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:27:31,260 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:27:31,317 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14203


2018-11-23 16:27:31,542 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:27:31,601 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 26928
chunk:  15  chunk size:  500000


2018-11-23 16:27:49,489 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:27:54,227 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:28:06,583 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:28:06,934 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:28:07,243 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 361468


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:28:41,925 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:28:42,194 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:28:45,300 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:28:45,355 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14693


2018-11-23 16:28:45,611 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:28:45,667 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 28380
chunk:  16  chunk size:  500000


2018-11-23 16:29:03,266 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:29:08,000 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:29:19,995 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:29:20,352 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:29:20,654 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 358983


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:29:54,814 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:29:55,022 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:29:57,649 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:29:57,706 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15455


2018-11-23 16:29:57,993 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:29:58,052 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 29435
chunk:  17  chunk size:  500000


2018-11-23 16:30:14,947 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:30:20,151 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:30:33,763 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:30:34,154 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:30:34,488 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 381973


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:31:09,519 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:31:09,732 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:31:12,593 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:31:12,651 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12116


2018-11-23 16:31:12,942 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:31:13,001 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 23349
chunk:  18  chunk size:  500000


2018-11-23 16:31:30,513 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:31:34,955 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:31:47,689 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:31:48,053 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:31:48,354 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 386389


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:32:22,475 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:32:22,697 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:32:25,971 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:32:26,030 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11016


2018-11-23 16:32:26,343 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:32:26,398 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20797
chunk:  19  chunk size:  500000


2018-11-23 16:32:43,080 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:32:47,448 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:32:59,692 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:33:00,098 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:33:00,449 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 392596


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:33:38,318 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:33:38,542 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:33:41,553 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:33:41,608 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10994


2018-11-23 16:33:41,886 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:33:41,947 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20588
chunk:  20  chunk size:  500000


2018-11-23 16:34:01,699 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:34:06,465 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:34:18,635 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:34:19,007 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:34:19,317 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 381090


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:34:54,389 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:34:54,639 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:34:57,723 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:34:57,780 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10864


2018-11-23 16:34:58,091 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:34:58,151 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20217
chunk:  21  chunk size:  500000


2018-11-23 16:35:17,620 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:35:22,059 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:35:34,174 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:35:34,593 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:35:34,982 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 380036


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:36:10,230 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:36:10,445 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:36:13,313 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:36:13,363 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11020


2018-11-23 16:36:13,659 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:36:13,712 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20882
chunk:  22  chunk size:  500000


2018-11-23 16:36:31,508 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:36:35,854 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:36:48,787 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:36:49,173 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:36:49,480 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 379246


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:37:25,725 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:37:25,975 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:37:28,990 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:37:29,046 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11698


2018-11-23 16:37:29,419 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:37:29,497 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 22402
chunk:  23  chunk size:  500000


2018-11-23 16:37:48,270 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:37:52,983 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:38:05,610 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:38:06,004 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:38:06,313 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 386152


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:38:43,875 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:38:44,125 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:38:47,460 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:38:47,525 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11151


2018-11-23 16:38:47,698 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:38:47,751 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20766
chunk:  24  chunk size:  500000


2018-11-23 16:39:07,081 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:39:12,457 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:39:25,160 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:39:25,622 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:39:25,945 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 384509


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:40:00,944 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:40:01,202 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:40:04,403 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:40:04,455 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10539


2018-11-23 16:40:04,655 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:40:04,708 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 19627
chunk:  25  chunk size:  500000


2018-11-23 16:40:22,546 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:40:27,081 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:40:39,695 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:40:40,074 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:40:40,382 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 391579


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:41:15,298 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:41:15,528 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:41:18,578 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:41:18,630 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10499


2018-11-23 16:41:18,822 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:41:18,873 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 19378
chunk:  26  chunk size:  500000


2018-11-23 16:41:34,866 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:41:39,675 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:41:53,263 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:41:53,662 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:41:53,973 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 392200


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:42:29,547 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:42:29,784 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:42:32,602 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:42:32,660 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11336


2018-11-23 16:42:32,870 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:42:32,921 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 21205
chunk:  27  chunk size:  500000


2018-11-23 16:42:48,904 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:42:53,615 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:43:07,666 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:43:08,046 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:43:08,382 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 385102


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:43:43,672 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:43:43,894 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:43:46,601 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:43:46,657 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12636


2018-11-23 16:43:46,899 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:43:46,958 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 24081
chunk:  28  chunk size:  500000


2018-11-23 16:44:02,605 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:44:07,268 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:44:19,702 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:44:20,070 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:44:20,371 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 371653


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:44:55,238 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:44:55,452 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:44:58,207 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:44:58,263 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14814


2018-11-23 16:44:58,537 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:44:58,597 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 28285
chunk:  29  chunk size:  500000


2018-11-23 16:45:14,565 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:45:18,937 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:45:31,515 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:45:31,961 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:45:32,353 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 357409


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:46:08,338 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:46:08,540 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:46:11,447 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:46:11,505 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 16067


2018-11-23 16:46:11,858 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:46:11,935 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 32192
chunk:  30  chunk size:  500000


2018-11-23 16:46:29,527 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:46:34,098 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:46:46,628 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:46:46,990 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:46:47,292 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 357265


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:47:21,452 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:47:21,671 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:47:24,556 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:47:24,623 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13505


2018-11-23 16:47:24,966 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:47:25,043 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 32712
chunk:  31  chunk size:  500000


2018-11-23 16:47:40,619 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:47:45,071 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:47:57,746 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:47:58,161 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:47:58,497 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 367403


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:48:33,156 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:48:33,365 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:48:35,911 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:48:35,971 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12336


2018-11-23 16:48:36,303 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:48:36,365 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 34850
chunk:  32  chunk size:  500000


2018-11-23 16:48:51,379 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:48:56,211 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:49:10,415 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:49:10,797 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:49:11,122 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 380538


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:49:48,500 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:49:48,717 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:49:51,708 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:49:51,763 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13303


2018-11-23 16:49:52,131 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:49:52,193 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 38609
chunk:  33  chunk size:  500000


2018-11-23 16:50:06,513 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:50:11,104 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:50:23,792 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:50:24,198 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:50:24,521 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 370187


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:51:01,351 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:51:01,563 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:51:04,542 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:51:04,601 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12106


2018-11-23 16:51:04,968 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:51:05,033 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 39342
chunk:  34  chunk size:  500000


2018-11-23 16:51:20,765 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:51:25,598 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:51:38,315 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:51:38,710 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:51:39,098 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 352541


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:52:16,182 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:52:16,384 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:52:19,059 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:52:19,116 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10331


2018-11-23 16:52:19,321 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:52:19,386 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 39138
chunk:  35  chunk size:  500000


2018-11-23 16:52:34,280 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:52:38,442 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:52:50,226 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:52:50,577 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:52:50,862 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 335859


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:53:24,842 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:53:25,095 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:53:28,236 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:53:28,295 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10472


2018-11-23 16:53:28,521 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:53:28,620 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 37946
chunk:  36  chunk size:  500000


2018-11-23 16:53:49,158 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:53:54,032 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:54:06,661 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:54:07,069 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:54:07,379 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 332226


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:54:42,094 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:54:42,342 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:54:45,186 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:54:45,240 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10372


2018-11-23 16:54:45,459 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:54:45,521 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 35806
chunk:  37  chunk size:  500000


2018-11-23 16:55:02,352 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:55:06,809 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:55:19,642 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:55:19,999 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:55:20,299 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 340152


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:55:56,330 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:55:56,531 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:55:59,323 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:55:59,376 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10562


2018-11-23 16:55:59,617 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:55:59,698 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 34610
chunk:  38  chunk size:  500000


2018-11-23 16:56:19,863 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:56:24,491 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:56:36,706 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:56:37,061 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:56:37,397 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 353384


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:57:11,761 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:57:11,967 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:57:14,611 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:57:14,664 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11511


2018-11-23 16:57:14,915 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:57:14,977 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 32614
chunk:  39  chunk size:  500000


2018-11-23 16:57:31,822 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:57:36,480 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:57:49,090 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:57:49,455 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:57:49,792 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 359894


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:58:24,998 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:58:25,232 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:58:28,372 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:58:28,426 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11812


2018-11-23 16:58:28,694 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:58:28,754 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 31181
chunk:  40  chunk size:  500000


2018-11-23 16:58:46,236 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 16:58:51,031 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:59:04,127 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:59:04,545 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:59:04,893 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 355592


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 16:59:41,000 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 16:59:41,199 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 16:59:43,864 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:59:43,914 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12361


2018-11-23 16:59:44,198 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 16:59:44,259 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 26880
chunk:  41  chunk size:  500000


2018-11-23 16:59:59,986 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:00:04,512 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:00:18,213 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:00:18,622 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:00:18,991 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 356378


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:00:54,859 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:00:55,063 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:00:57,642 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:00:57,693 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12804


2018-11-23 17:00:57,988 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:00:58,045 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 24016
chunk:  42  chunk size:  500000


2018-11-23 17:01:14,035 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:01:18,649 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:01:31,582 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:01:31,983 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:01:32,313 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 356996


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:02:08,704 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:02:08,930 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:02:11,896 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:02:11,946 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12635


2018-11-23 17:02:12,270 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:02:12,321 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 23695
chunk:  43  chunk size:  500000


2018-11-23 17:02:29,264 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:02:33,857 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:02:46,321 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:02:46,690 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:02:47,017 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 351537


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:03:21,935 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:03:22,136 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:03:24,889 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:03:24,940 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 12609


2018-11-23 17:03:25,272 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:03:25,328 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 23765
chunk:  44  chunk size:  500000


2018-11-23 17:03:41,315 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:03:45,796 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:03:58,418 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:03:58,776 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:03:59,115 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 383022


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:04:35,500 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:04:35,716 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:04:38,578 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:04:38,630 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14778


2018-11-23 17:04:39,026 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:04:39,097 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 27974
chunk:  45  chunk size:  500000


2018-11-23 17:04:57,494 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:05:02,005 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:05:14,728 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:05:15,148 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:05:15,489 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 374329


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:05:51,597 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:05:51,803 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:05:54,574 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:05:54,628 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15317


2018-11-23 17:05:54,855 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:05:54,914 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 28886
chunk:  46  chunk size:  500000


2018-11-23 17:06:11,267 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:06:15,808 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:06:28,862 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:06:29,255 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:06:29,566 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 380854


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:07:07,181 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:07:07,394 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:07:10,498 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:07:10,560 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15508


2018-11-23 17:07:10,860 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:07:10,923 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 28977
chunk:  47  chunk size:  500000


2018-11-23 17:07:28,374 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:07:33,217 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:07:45,828 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:07:46,194 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:07:46,564 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 381760


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:08:22,582 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:08:22,797 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:08:25,651 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:08:25,702 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15762


2018-11-23 17:08:25,981 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:08:26,037 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 29365
chunk:  48  chunk size:  500000


2018-11-23 17:08:41,785 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:08:46,348 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:08:59,333 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:08:59,692 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:09:00,005 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 381954


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:09:36,217 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:09:36,450 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:09:39,318 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:09:39,376 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15790


2018-11-23 17:09:39,657 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:09:39,712 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 29439
chunk:  49  chunk size:  500000


2018-11-23 17:09:56,093 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:10:00,680 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:10:13,392 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:10:13,772 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:10:14,112 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 378129


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:10:51,066 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:10:51,293 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:10:54,200 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:10:54,252 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14587


2018-11-23 17:10:54,547 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:10:54,610 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 27644
chunk:  50  chunk size:  500000


2018-11-23 17:11:11,447 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:11:16,239 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:11:29,028 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:11:29,421 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:11:29,718 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 383883


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:12:05,862 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:12:06,123 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:12:09,365 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:12:09,421 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13355


2018-11-23 17:12:09,754 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:12:09,810 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 25172
chunk:  51  chunk size:  500000


2018-11-23 17:12:27,374 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:12:31,906 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:12:44,560 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:12:44,921 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:12:45,260 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 394284


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:13:22,130 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:13:22,379 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:13:25,305 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:13:25,357 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11539


2018-11-23 17:13:25,659 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:13:25,711 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 21449
chunk:  52  chunk size:  500000


2018-11-23 17:13:44,744 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:13:49,306 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:14:01,787 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:14:02,162 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:14:02,481 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 391290


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:14:38,319 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:14:38,544 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:14:41,308 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:14:41,359 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10362


2018-11-23 17:14:41,660 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:14:41,717 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 18981
chunk:  53  chunk size:  500000


2018-11-23 17:14:59,511 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:15:03,968 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:15:16,559 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:15:16,966 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:15:17,378 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 388059


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:15:52,736 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:15:52,950 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:15:55,718 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:15:55,769 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11169


2018-11-23 17:15:56,106 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:15:56,156 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20546
chunk:  54  chunk size:  500000


2018-11-23 17:16:11,729 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:16:16,427 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:16:28,919 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:16:29,279 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:16:29,569 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 378682


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:17:06,640 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:17:06,897 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:17:09,679 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:17:09,730 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13436


2018-11-23 17:17:09,914 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:17:09,973 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 25489
chunk:  55  chunk size:  500000


2018-11-23 17:17:29,471 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:17:33,991 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:17:45,930 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:17:46,297 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:17:46,586 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 359628


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:18:20,194 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:18:20,402 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:18:23,059 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:18:23,111 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14108


2018-11-23 17:18:23,322 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:18:23,377 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 26774
chunk:  56  chunk size:  500000


2018-11-23 17:18:40,516 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:18:45,254 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:18:58,017 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:18:58,403 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:18:58,764 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 363190


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:19:35,141 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:19:35,345 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:19:38,573 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:19:38,625 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15066


2018-11-23 17:19:38,861 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:19:38,916 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 28599
chunk:  57  chunk size:  500000


2018-11-23 17:19:56,681 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:20:01,255 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:20:13,596 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:20:14,013 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:20:14,322 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 373346


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:20:48,639 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:20:48,846 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:20:51,590 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:20:51,640 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14084


2018-11-23 17:20:51,891 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:20:51,946 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 26987
chunk:  58  chunk size:  500000


2018-11-23 17:21:08,842 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:21:13,230 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:21:25,660 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:21:26,013 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:21:26,325 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 386613


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:22:02,087 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:22:02,362 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:22:05,201 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:22:05,250 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10672


2018-11-23 17:22:05,494 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:22:05,547 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20123
chunk:  59  chunk size:  500000


2018-11-23 17:22:23,227 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:22:27,805 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:22:40,113 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:22:40,483 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:22:40,786 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 381260


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:23:15,890 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:23:16,122 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:23:18,852 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:23:18,905 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11810


2018-11-23 17:23:19,192 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:23:19,255 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 22341
chunk:  60  chunk size:  500000


2018-11-23 17:23:36,334 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:23:41,089 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:23:53,980 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:23:54,352 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:23:54,661 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 398213


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:24:30,394 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:24:30,611 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:24:34,039 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:24:34,100 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 10853


2018-11-23 17:24:34,380 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:24:34,431 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 20049
chunk:  61  chunk size:  500000


2018-11-23 17:24:51,080 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:24:55,654 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:25:08,551 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:25:08,930 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:25:09,247 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 388083


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:25:44,300 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:25:44,533 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:25:47,242 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:25:47,290 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11270


2018-11-23 17:25:47,594 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:25:47,647 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 21206
chunk:  62  chunk size:  500000


2018-11-23 17:26:03,967 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:26:08,561 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:26:21,145 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:26:21,524 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:26:21,834 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 376079


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:26:57,199 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:26:57,404 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:27:00,521 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:27:00,572 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14774


2018-11-23 17:27:00,902 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:27:00,957 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 28095
chunk:  63  chunk size:  500000


2018-11-23 17:27:19,089 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:27:24,065 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:27:38,072 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:27:38,445 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:27:38,766 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 358601


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:28:15,520 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:28:15,726 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:28:18,538 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:28:18,596 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15397


2018-11-23 17:28:18,974 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:28:19,035 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 37221
chunk:  64  chunk size:  500000


2018-11-23 17:28:34,532 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:28:39,419 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:28:52,614 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:28:53,042 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:28:53,371 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 350234


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:29:29,558 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:29:29,796 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:29:32,523 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:29:32,579 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13182


2018-11-23 17:29:32,795 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:29:32,860 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 41491
chunk:  65  chunk size:  500000


2018-11-23 17:29:49,585 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:29:54,484 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:30:07,123 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:30:07,467 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:30:07,759 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 334550


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:30:41,989 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:30:42,225 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:30:44,884 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:30:44,958 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 9953


2018-11-23 17:30:45,170 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:30:45,235 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 39203
chunk:  66  chunk size:  500000


2018-11-23 17:31:02,203 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:31:06,997 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:31:19,960 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:31:20,351 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:31:20,669 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 356056


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:31:57,831 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:31:58,033 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:32:00,812 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:32:00,862 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 11506


2018-11-23 17:32:01,101 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:32:01,186 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 39085
chunk:  67  chunk size:  500000


2018-11-23 17:32:18,960 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:32:23,819 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:32:37,364 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:32:37,718 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:32:38,028 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 350995


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:33:14,646 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:33:14,845 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:33:17,266 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:33:17,317 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13109


2018-11-23 17:33:17,560 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:33:17,618 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 35249
chunk:  68  chunk size:  500000


2018-11-23 17:33:33,357 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:33:38,173 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:33:51,882 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:33:52,282 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:33:52,626 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 351140


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:34:30,265 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:34:30,500 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:34:33,418 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:34:33,473 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13252


2018-11-23 17:34:33,753 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:34:33,814 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 25967
chunk:  69  chunk size:  500000


2018-11-23 17:34:51,151 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:34:56,038 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:35:09,535 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:35:09,948 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:35:10,321 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 356941


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:35:46,632 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:35:46,832 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:35:49,381 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:35:49,434 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 14738


2018-11-23 17:35:49,709 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:35:49,763 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 28100
chunk:  70  chunk size:  500000


2018-11-23 17:36:07,300 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:36:11,892 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:36:24,896 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:36:25,289 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:36:25,683 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 380764


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:37:02,639 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:37:02,869 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:37:06,044 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:37:06,100 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 15989


2018-11-23 17:37:06,441 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:37:06,503 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 29453
chunk:  71  chunk size:  500000


2018-11-23 17:37:23,042 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:37:27,911 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:37:41,095 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:37:41,468 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:37:41,770 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 386598


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:38:17,870 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:38:18,088 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:38:21,350 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:38:21,401 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 13826


2018-11-23 17:38:21,712 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:38:21,765 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 26329
chunk:  72  chunk size:  340072


2018-11-23 17:38:33,138 - drop_invalid_delinquency_status - INFO - invalid_delinquency_status dropped
2018-11-23 17:38:36,123 - drop_invalid_delinquency_status - INFO - dropping invalid transitions and delinquency status, fill nan values, drop duplicates
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:38:45,052 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:38:45,310 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:38:45,526 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


STANDARD DEV zero:  []
Records for train Set - Number of rows: 269018


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
2018-11-23 17:39:10,822 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
2018-11-23 17:39:10,990 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...
2018-11-23 17:39:13,284 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:39:13,332 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for valid Set - Number of rows: 8916


2018-11-23 17:39:13,613 - allfeatures_drop_cols - INFO - ...Columns Excluded from dataset...
2018-11-23 17:39:13,665 - allfeatures_extract_labels - INFO - ...Labels extracted from Dataset...


Records for test Set - Number of rows: 16525


2018-11-23 17:39:13,892 - allfeatures_extract_labels - INFO - training, validation and testing set into .h5 file


train/features size:  26769588
valid/features size:  901868
test/features size:  1915122
26769588 901868 1915122
Preprocessing Time per file:  1:29:09.921946
Preprocessing - Time:  1:29:09.952864
