In [2]:
import sys
sys.path.append('../')

import pandas as pd
import numpy as np
import math
from timeit import default_timer as timer
from datetime import datetime, timedelta
import numba
from ensemble_processing import load_data, load, save
from ta import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
pd.options.display.max_rows=500
pd.options.display.max_columns=500

In [4]:
def fix_duplicate_columns(df):
    # Get unique list of columms
    unique_cols = np.unique(df.columns.values)
    
    for col in unique_cols:
        column_numbers = [x for x in range(df.shape[1])]  # list of columns' integer indices
        remove_index = -1
        already_located = False

        for col_num in range(len(df.columns)):
            if df.columns[col_num] == col and already_located:
                remove_index = col_num
                print('Found duplicate for ', col, '- remove index', remove_index)
            elif df.columns[col_num] == col and not already_located:
                already_located = True
#                 print('Found first occurence', col, '-', col_num)
                
        # If a duplicate has been found, remove the column from the index list
        if remove_index != -1:
            column_numbers.remove(remove_index) #removing column integer index n
            df = df.iloc[:, column_numbers] #return all columns except the nth column

    return df


In [5]:
from scipy import stats
from processing_constants import ALL_CONTINUOUS_COLUMNS, HIGH_NAN_COLUMNS
from processing_constants import BOLLINGER_PREDICTION_COLUMNS, BOLLINGER_VALUE_COLUMNS
ID_COLUMNS = ['symbol', 'GICSSector', 'GICSIndustryGroup', 'GICSIndustry']
DATE_COLS = ['exDividendDate']

median_cols = []
median_cols.extend(ALL_CONTINUOUS_COLUMNS)
median_cols.extend(DATE_COLS)
# Remove val which isn't in df
median_cols.remove('quoteDate_TIMESTAMP')

sum_cols = ['totalVolume']

mode_cols = []
# mode_cols.extend(ID_COLUMNS)

min_numeric_cols = ['weekLow']

min_string_cols = []
min_string_cols.extend(BOLLINGER_PREDICTION_COLUMNS)
min_string_cols.extend(BOLLINGER_VALUE_COLUMNS)

max_cols = ['weekHigh']
first_cols = ['weekOpen']
last_cols = ['weekClose']

def return_week_summary_symbol(df):
    """
        Creates a weekly summary of the daily results for a symbol - assumes a datetime index
    """

    # remove high nan cols
    df.drop(HIGH_NAN_COLUMNS, axis=1, inplace=True, errors='ignore')

    # remove separate date col
    df.drop(['quoteDate'], axis=1, inplace=True, errors='ignore')

    # cols for special vals
    df['totalVolume'] = df['volume']
    df['weekLow'] = df['daysLow']
    df['weekHigh'] = df['daysHigh']
    df['weekOpen'] = df['previousClose']
    df['weekClose'] = df['adjustedPrice']

    if len(median_cols):
        median_resample = df[median_cols].resample('1W').median()
    else:
        median_resample = pd.DataFrame()

    if len(sum_cols):
        sum_resample = df[sum_cols].resample('1W').sum()
    else:
        sum_resample = pd.DataFrame()

    if len(mode_cols):
        mode_resample = pd.DataFrame()

        for col in ID_COLUMNS:
            temp_df = pd.DataFrame()
            temp_df[col] = df[col].resample('1W').apply(lambda x: (stats.mode(x, axis=None)[0][0]))
            mode_resample = pd.concat([mode_resample, temp_df], axis=1)
            
    else:
        mode_resample = pd.DataFrame()

    if len(min_numeric_cols):
        min_numeric_resample = df[min_numeric_cols].resample('1W').min()
    else:
        min_numeric_resample = pd.DataFrame()

    if len(min_string_cols):
        min_string_resample = df[min_string_cols].resample('1W').min()
    else:
        min_string_resample = pd.DataFrame()

    if len(max_cols):
        max_resample = df[max_cols].resample('1W').max()
    else:
        max_resample = pd.DataFrame()

    if len(first_cols):
        first_resample = df[first_cols].resample('1W').first()
    else:
        first_resample = pd.DataFrame()

    if len(last_cols):
        last_resample = df[last_cols].resample('1W').last()
    else:
        last_resample = pd.DataFrame()

        
    return pd.concat([median_resample, sum_resample, min_numeric_resample, min_string_resample, max_resample, 
                      first_resample, last_resample], axis=1)


In [6]:
def split_df(df, train_proportion, validation_test_proportion):
#     num_recs = df.shape[0]
#     train_num = int(train_proportion * num_recs)
#     validation_test = num_recs - train_num
#     validation_num = int(validation_test_proportion * validation_test) + train_num
#     test_num = num_recs - validation_num    

#     train_df = df.iloc[0:train_num,:]
#     validation_df = df.iloc[train_num:validation_num,:]
    print('Data split.  Total recs:', df.shape)
    train_df = df.sample(frac=train_proportion)
    print('Train recs:', train_df.shape)
    
    df.drop(train_df.index, inplace=True)
    print('Remaining data recs:', df.shape)
    
    if validation_test_proportion < 1:
        validation_df = df.sample(frac=validation_test_proportion)
        print('Validation recs:', validation_df.shape)
        df.drop(validation_df.index, inplace=True)
        print('Remaining data recs:', df.shape)
        test_df = df
        print('Test recs:', test_df.shape)
    else:
        validation_df = df
        test_df = None
        
    return train_df, validation_df, test_df

In [7]:
from optimise_dataframe import optimise_df
from processing_constants import WHOLE_MARKET_COLUMNS

WHOLE_MARKET_TA = ['allordpreviousclose', 'asxpreviousclose','640106_A3597525W', 'FIRMMCRT', 'FXRUSD', 
                   'GRCPAIAD', 'GRCPAISAD', 'GRCPBCAD', 'GRCPBCSAD', 'GRCPBMAD', 'GRCPNRAD', 'GRCPRCAD', 
                   'H01_GGDPCVGDP', 'H01_GGDPCVGDPFY', 'H05_GLFSEPTPOP']

def add_ta_values_to_df(df, feature_map):
    # Calculate ta features

    return add_all_ta_features(df, ta_open, ta_high, ta_low, ta_close, ta_volume, 
                               fillna=ta_fillna, colprefix=ta_colprefix)

def retrieve_and_calculate_weekly_symbol_dfs(path, run_str=None, load_list=None):
    """
        Retrieves the individual dataframes saved during pre-processing, calulates weekly values,
          adds Technical Analysis values.  It then divides the data into 70 / 15 / 15 as train / 
          validation / test and returns the three data sets
    """
    train_symbol_dfs = []
    validation_symbol_dfs = []
    test_symbol_dfs = []
    all_dfs = []
    whole_market_df = pd.DataFrame()
    
    # Create list of files to load
    if load_list:
        file_list = [(path + 'ml-symbol-' + item + '-' + run_str + '.pkl.gz') for item in load_list]
    else:
        print('Checking for files from', path)
        # Return files in path
        file_list = glob.glob(path + 'ml-weekly-symbol-*' + run_str + '.pkl.gz')
        print('Found', len(file_list),'weekly symbol files')
        
    

    # Pre-defined files to look for
    for file in file_list:
        daily_symbol_df = pd.read_pickle(file, compression='gzip')
        whole_market_df = whole_market_df.append(daily_symbol_df[WHOLE_MARKET_COLUMNS])
        symbol = daily_symbol_df.iloc[0,:]['symbol']
        GICSSector = daily_symbol_df.iloc[0,:]['GICSSector']
        GICSIndustryGroup = daily_symbol_df.iloc[0,:]['GICSIndustryGroup']
        GICSIndustry = daily_symbol_df.iloc[0,:]['GICSIndustry']

        weekly_symbol_df = return_week_summary_symbol(daily_symbol_df)
        weekly_symbol_df = fix_duplicate_columns(weekly_symbol_df)
        weekly_symbol_df['symbol'] = symbol
        weekly_symbol_df['GICSSector'] = GICSSector
        weekly_symbol_df['GICSIndustryGroup'] = GICSIndustryGroup
        weekly_symbol_df['GICSIndustry'] = GICSIndustry

        complete_weekly_df = add_all_ta_features(weekly_symbol_df, 'weekOpen', 'weekHigh', 'weekLow', 
                                                 'weekClose', 'totalVolume', fillna=True, colprefix='ta_')
#         complete_weekly_df = add_ta_values_to_df(weekly_symbol_df)
        # Create target column - 8 weeks in the future
        complete_weekly_df['target'] = (complete_weekly_df['adjustedPrice'].shift(-8) - complete_weekly_df['adjustedPrice'])  / complete_weekly_df['adjustedPrice'].clip(lower=0.1) * 100

        complete_weekly_df.dropna(subset=['target'], inplace=True)
        complete_weekly_df = optimise_df(complete_weekly_df)
        train_df, validation_df, test_df = split_df(complete_weekly_df, 0.7, 0.5)

        all_dfs.append(complete_weekly_df)
        train_symbol_dfs.append(train_df)
        validation_symbol_dfs.append(validation_df) 
        test_symbol_dfs.append(test_df) 

    print('Consolidating whole market data')
    whole_market_df = whole_market_df.drop_duplicates()
    # Ensure there is only one record per day
    whole_market_df = whole_market_df.groupby('quoteDate').first()
    whole_market_df['quoteDate'] = whole_market_df.index

    # Combine into weekly data
    print('Combining into weekly data and adding ta')

    # convert into weekly vals for each col and add ta
    whole_market_weekly_dfs = []
    for col in WHOLE_MARKET_TA:
        print('Calculating weekly data for', col)
        resample = pd.DataFrame()
        resample[col + '_low'] = whole_market_df[col].resample('1W').min()
        resample[col + '_high'] = whole_market_df[col].resample('1W').max()
        resample[col + '_open'] = whole_market_df[col].resample('1W').first()
        resample[col + '_close'] = whole_market_df[col].resample('1W').last()
        resample[col + '_volume'] = 0

#         print('Resample shape for', col, resample.shape)
        print('Adding ta data for', col)
        resample = add_all_ta_features(resample, col + '_open', col + '_high', col + '_low', 
                                       col + '_close', col + '_volume', fillna=True, colprefix= col + '_ta_')
        
#         print('Resample shape after ta for', col, resample.shape)
        resample.index.names = ['week_starting']
        whole_market_weekly_dfs.append(resample)

    print('Concatenating whole market data')
    whole_market_weekly_df = pd.concat(whole_market_weekly_dfs, axis=1)
    whole_market_weekly_df.index.names = ['week_starting']
    print('Concatenated whole market shape', whole_market_weekly_df.shape)
        
    print('Concatenating symbol dfs')
    # Create empty data frame
    all_df = pd.concat(all_dfs)
    all_df.index.names = ['week_starting']
    train_df = pd.concat(train_symbol_dfs)
    train_df.index.names = ['week_starting']
    validation_df = pd.concat(validation_symbol_dfs)
    validation_df.index.names = ['week_starting']
    test_df = pd.concat(test_symbol_dfs)
    test_df.index.names = ['week_starting']
    
    print('Adding whole market data')
    all_df = all_df.merge(whole_market_weekly_df, how='left', left_on='week_starting', right_on='week_starting')
    train_df = train_df.merge(whole_market_weekly_df, how='left', left_on='week_starting', right_on='week_starting')
    validation_df = validation_df.merge(whole_market_weekly_df, how='left', left_on='week_starting', right_on='week_starting')
    test_df = test_df.merge(whole_market_weekly_df, how='left', left_on='week_starting', right_on='week_starting')
    
    print('Optimising symbol dfs')
    all_df = optimise_df(all_df)
    train_df = optimise_df(train_df)
    validation_df = optimise_df(validation_df)
    test_df = optimise_df(test_df)

    return all_df, train_df, validation_df, test_df


In [8]:
# experiment_symbols = ['WAX']

# experiment_symbols = ['WAX', 'IVV', 'WESN', 'IAA', 'XRO', 'MTB', 'MXR', 'RCP', 'ISX', 'IMI']

# experiment_symbols = ['GSW','VGAD','CUA','TIX','FLT','PTN','OVN','IRU','FDM','HDF','SVW','BWX',
#                       'FPH','AGL','JHC','HIN','TRS','HGO','SGP','AVN','DOW','NVL','AOG','BGL',
#                       'HSN','CSS','SWM','RIC','CMI','MKE','BCI','IBG','SWJ','IXP','MYX','RNY',
#                       'CZA','SRV','VMT','CDM','CLQ','BPS','CSE','ORR','OOK','EOS','BRL','BWP',
#                       'ENC','FCT','SIO','SZG','ECG','SXA','MHC','PHK','ICT','CLH','NTM','TAS',
#                       'REV','CII','FND','ZGM','GOE','SSI','QMN','EAL','DSB','PNX','KRS','RXH',
#                       'LAU','CNW','GSZ','IBC','SAS','EAS','OCC','AIS','INK','AIK','EVM','MSV',
#                       'XST','GMR','WWI','JYC','BSM','VRX','TKL','WFE','ZMI','SHK','DGO','BD1',
#                       'ALT','SES','RNO','MXC','IRC','GTR','AYM','RLC','IVG','MDG','MPE','MOT',
#                       'OAR','HGL','DSE','VII','KTL']

experiment_symbols = ['VGS', 'ALB', 'CUA', '1PG', 'MBN', 'UGL', 'TTS', 'MGE', 'ALQ', 'CDC', 'TFC', 
                      'ASL', 'BAL', 'CCV', 'DRM', 'TPM', 'EOS', 'SCP', 'CZA', 'SLR', 'CKF', 'NAB', 
                      'KCN', 'SMR', 'FPH', 'EWC', 'OTR', 'ASY', 'SLX', 'CCP', 'FUN', 'NWS', 'CSS', 
                      'DJW', 'WHC', 'LHC', 'OGX', 'VOC', 'HPI', 'VLW', 'LYC', 'RWD', 'MGR', 'CSR', 
                      'RDF', 'AGS', 'MZM', 'IAU', 'CLV', 'HAV', 'MAT', 'MNY', 'RMP', 'DFM', 'EOC', 
                      'CKA', 'WPP', 'EAR', 'CLH', 'PDZ', 'CTM', 'MVP', 'CAP', 'SHM', 'RHP', 'GEG', 
                      'AWV', 'APL', 'CAS', 'YPB', 'HUO', 'NCK', 'DKM', 'BSN', 'UBP', 'TPP', 'MYO', 
                      'CIN', 'MMI', 'SWJ', 'EVS', 'AZV', 'MOD', 'BAR', 'RBO', 'GSC', 'EXC', 'OSP', 
                      'PLA', 'IQE', 'QTM', 'LCY', 'CAV', 'TRL', 'NIO', 'EXR', 'FRC', 'BMZ', 'AEE', 
                      'AUK', 'SSN', 'REF', 'RDS', 'SDL', 'DVA', 'FEL', 'SMP', 'TMM', 'WBT', 'VEI', 
                      'VXR', 'APG', 'SPI', 'CNL', 'IHL', 'SMN', 'CAF', 'RXH', 'AAX', 'MMR', 'PIO', 
                      'PAG', 'DRG', 'AOU', 'MGL', 'AQX', 'SBM', 'SSG', 'PIL', 'DGX', 'BGG', 'MGV', 
                      'AZH', 'DRX', 'ESI', 'ADV', 'USA', 'FRN', 'PAA', 'MSM', 'GPP', 'AOP', 'TKL', 
                      'MGP', 'FDV', 'CBD', 'WDR', 'CCA', 'NAG', 'DRK', 'MCS', 'CTR', 'NWE', 'UIL', 
                      'OBS', 'NCR', 'MUS', 'BCC', 'CZR', 'LCD', 'VKA', 'ICU', 'SGO', 'GLY', 'CNC', 
                      'TPD', 'CIO', 'GLG', 'QBE', 'CPK', 'ADA', 'DME', 'QNL', 'SBI', 'RVY', 'SXI', 
                      'AUZ', 'WFE', 'QUB', 'SCG', 'GBX', 'CXD', 'DIG', 'CGN', 'CJC', 'SST', 'IOT', 
                      'FAC', 'SLE', 'KMC', 'SRV', 'CCZ', 'LNU', 'LNK', 'LSH', 'ECO', 'LEX', 'EHE', 
                      'IFM', 'MLC', 'SRA', 'AFT', 'LGM', 'OHE', 'AMP', 'BAS', 'TFS', 'GSZ', 'EPY', 
                      'BUE', 'BDI', 'PAW', 'APD', 'SDF', 'MBD', 'GIA', 'CLT', 'COJ', 'ONT', 'GLB', 
                      'SKT', 'SVW', 'BEN', 'ROG', 'CMI']

all_df, train_df, validation_df, test_df = retrieve_and_calculate_weekly_symbol_dfs('../data/symbols/', '20190416', experiment_symbols)

print('All data:', all_df.shape)
print('Training data:', train_df.shape)
print('Validation data:', validation_df.shape)
print('Test data:', test_df.shape)

all_df.to_pickle('../data/ml-ta-all-data-20190416.pkl.gz', compression='gzip')
train_df.to_pickle('../data/ml-ta-train-data-20190416.pkl.gz', compression='gzip')
validation_df.to_pickle('../data/ml-ta-validation-data-20190416.pkl.gz', compression='gzip')
test_df.to_pickle('../data/ml-ta-test-data-20190416.pkl.gz', compression='gzip')



  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (173, 283)
Train recs: (121, 283)
Remaining data recs: (52, 283)
Validation recs: (26, 283)
Remaining data recs: (26, 283)
Test recs: (26, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (210, 283)
Train recs: (147, 283)
Remaining data recs: (63, 283)
Validation recs: (32, 283)
Remaining data recs: (31, 283)
Test recs: (31, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (156, 283)
Train recs: (109, 283)
Remaining data recs: (47, 283)
Validation recs: (24, 283)
Remaining data recs: (23, 283)
Test recs: (23, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (494, 283)
Train recs: (346, 283)
Remaining data recs: (148, 283)
Validation recs: (74, 283)
Remaining data recs: (74, 283)
Test recs: (74, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (418, 283)
Train recs: (293, 283)
Remaining data recs: (125, 283)
Validation recs: (62, 283)
Remaining data recs: (63, 283)
Test recs: (63, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (488, 283)
Train recs: (342, 283)
Remaining data recs: (146, 283)
Validation recs: (73, 283)
Remaining data recs: (73, 283)
Test recs: (73, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (538, 283)
Train recs: (377, 283)
Remaining data recs: (161, 283)
Validation recs: (80, 283)
Remaining data recs: (81, 283)
Test recs: (81, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (153, 283)
Train recs: (107, 283)
Remaining data recs: (46, 283)
Validation recs: (23, 283)
Remaining data recs: (23, 283)
Test recs: (23, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (80, 283)
Train recs: (56, 283)
Remaining data recs: (24, 283)
Validation recs: (12, 283)
Remaining data recs: (12, 283)
Test recs: (12, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (496, 283)
Train recs: (347, 283)
Remaining data recs: (149, 283)
Validation recs: (74, 283)
Remaining data recs: (75, 283)
Test recs: (75, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (238, 283)
Train recs: (167, 283)
Remaining data recs: (71, 283)
Validation recs: (36, 283)
Remaining data recs: (35, 283)
Test recs: (35, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (469, 283)
Train recs: (328, 283)
Remaining data recs: (141, 283)
Validation recs: (70, 283)
Remaining data recs: (71, 283)
Test recs: (71, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (326, 283)
Train recs: (228, 283)
Remaining data recs: (98, 283)
Validation recs: (49, 283)
Remaining data recs: (49, 283)
Test recs: (49, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (536, 283)
Train recs: (375, 283)
Remaining data recs: (161, 283)
Validation recs: (80, 283)
Remaining data recs: (81, 283)
Test recs: (81, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (589, 283)
Train recs: (412, 283)
Remaining data recs: (177, 283)
Validation recs: (88, 283)
Remaining data recs: (89, 283)
Test recs: (89, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (395, 283)
Train recs: (276, 283)
Remaining data recs: (119, 283)
Validation recs: (60, 283)
Remaining data recs: (59, 283)
Test recs: (59, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (464, 283)
Train recs: (325, 283)
Remaining data recs: (139, 283)
Validation recs: (70, 283)
Remaining data recs: (69, 283)
Test recs: (69, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (493, 283)
Train recs: (345, 283)
Remaining data recs: (148, 283)
Validation recs: (74, 283)
Remaining data recs: (74, 283)
Test recs: (74, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (497, 283)
Train recs: (348, 283)
Remaining data recs: (149, 283)
Validation recs: (74, 283)
Remaining data recs: (75, 283)
Test recs: (75, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (601, 283)
Train recs: (421, 283)
Remaining data recs: (180, 283)
Validation recs: (90, 283)
Remaining data recs: (90, 283)
Test recs: (90, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (297, 283)
Train recs: (208, 283)
Remaining data recs: (89, 283)
Validation recs: (44, 283)
Remaining data recs: (45, 283)
Test recs: (45, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (224, 283)
Train recs: (157, 283)
Remaining data recs: (67, 283)
Validation recs: (34, 283)
Remaining data recs: (33, 283)
Test recs: (33, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (388, 283)
Train recs: (272, 283)
Remaining data recs: (116, 283)
Validation recs: (58, 283)
Remaining data recs: (58, 283)
Test recs: (58, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (272, 283)
Train recs: (190, 283)
Remaining data recs: (82, 283)
Validation recs: (41, 283)
Remaining data recs: (41, 283)
Test recs: (41, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (560, 283)
Train recs: (392, 283)
Remaining data recs: (168, 283)
Validation recs: (84, 283)
Remaining data recs: (84, 283)
Test recs: (84, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (606, 283)
Train recs: (424, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (599, 283)
Train recs: (419, 283)
Remaining data recs: (180, 283)
Validation recs: (90, 283)
Remaining data recs: (90, 283)
Test recs: (90, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (175, 283)
Train recs: (122, 283)
Remaining data recs: (53, 283)
Validation recs: (26, 283)
Remaining data recs: (27, 283)
Test recs: (27, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (149, 283)
Train recs: (104, 283)
Remaining data recs: (45, 283)
Validation recs: (22, 283)
Remaining data recs: (23, 283)
Test recs: (23, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (581, 283)
Train recs: (407, 283)
Remaining data recs: (174, 283)
Validation recs: (87, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (589, 283)
Train recs: (412, 283)
Remaining data recs: (177, 283)
Validation recs: (88, 283)
Remaining data recs: (89, 283)
Test recs: (89, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (192, 283)
Train recs: (134, 283)
Remaining data recs: (58, 283)
Validation recs: (29, 283)
Remaining data recs: (29, 283)
Test recs: (29, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (515, 283)
Train recs: (360, 283)
Remaining data recs: (155, 283)
Validation recs: (78, 283)
Remaining data recs: (77, 283)
Test recs: (77, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (406, 283)
Train recs: (284, 283)
Remaining data recs: (122, 283)
Validation recs: (61, 283)
Remaining data recs: (61, 283)
Test recs: (61, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (123, 283)
Train recs: (86, 283)
Remaining data recs: (37, 283)
Validation recs: (18, 283)
Remaining data recs: (19, 283)
Test recs: (19, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (579, 283)
Train recs: (405, 283)
Remaining data recs: (174, 283)
Validation recs: (87, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (606, 283)
Train recs: (424, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (227, 283)
Train recs: (159, 283)
Remaining data recs: (68, 283)
Validation recs: (34, 283)
Remaining data recs: (34, 283)
Test recs: (34, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (237, 283)
Train recs: (166, 283)
Remaining data recs: (71, 283)
Validation recs: (36, 283)
Remaining data recs: (35, 283)
Test recs: (35, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (524, 283)
Train recs: (367, 283)
Remaining data recs: (157, 283)
Validation recs: (78, 283)
Remaining data recs: (79, 283)
Test recs: (79, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (101, 283)
Train recs: (71, 283)
Remaining data recs: (30, 283)
Validation recs: (15, 283)
Remaining data recs: (15, 283)
Test recs: (15, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (199, 283)
Train recs: (139, 283)
Remaining data recs: (60, 283)
Validation recs: (30, 283)
Remaining data recs: (30, 283)
Test recs: (30, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (482, 283)
Train recs: (337, 283)
Remaining data recs: (145, 283)
Validation recs: (72, 283)
Remaining data recs: (73, 283)
Test recs: (73, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (395, 283)
Train recs: (276, 283)
Remaining data recs: (119, 283)
Validation recs: (60, 283)
Remaining data recs: (59, 283)
Test recs: (59, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (572, 283)
Train recs: (400, 283)
Remaining data recs: (172, 283)
Validation recs: (86, 283)
Remaining data recs: (86, 283)
Test recs: (86, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (581, 283)
Train recs: (407, 283)
Remaining data recs: (174, 283)
Validation recs: (87, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (561, 283)
Train recs: (393, 283)
Remaining data recs: (168, 283)
Validation recs: (84, 283)
Remaining data recs: (84, 283)
Test recs: (84, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (323, 283)
Train recs: (226, 283)
Remaining data recs: (97, 283)
Validation recs: (48, 283)
Remaining data recs: (49, 283)
Test recs: (49, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (356, 283)
Train recs: (249, 283)
Remaining data recs: (107, 283)
Validation recs: (54, 283)
Remaining data recs: (53, 283)
Test recs: (53, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (252, 283)
Train recs: (176, 283)
Remaining data recs: (76, 283)
Validation recs: (38, 283)
Remaining data recs: (38, 283)
Test recs: (38, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (131, 283)
Train recs: (92, 283)
Remaining data recs: (39, 283)
Validation recs: (20, 283)
Remaining data recs: (19, 283)
Test recs: (19, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (606, 283)
Train recs: (424, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (552, 283)
Train recs: (386, 283)
Remaining data recs: (166, 283)
Validation recs: (83, 283)
Remaining data recs: (83, 283)
Test recs: (83, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (416, 283)
Train recs: (291, 283)
Remaining data recs: (125, 283)
Validation recs: (62, 283)
Remaining data recs: (63, 283)
Test recs: (63, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (465, 283)
Train recs: (326, 283)
Remaining data recs: (139, 283)
Validation recs: (70, 283)
Remaining data recs: (69, 283)
Test recs: (69, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (369, 283)
Train recs: (258, 283)
Remaining data recs: (111, 283)
Validation recs: (56, 283)
Remaining data recs: (55, 283)
Test recs: (55, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (376, 283)
Train recs: (263, 283)
Remaining data recs: (113, 283)
Validation recs: (56, 283)
Remaining data recs: (57, 283)
Test recs: (57, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (490, 283)
Train recs: (343, 283)
Remaining data recs: (147, 283)
Validation recs: (74, 283)
Remaining data recs: (73, 283)
Test recs: (73, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (555, 283)
Train recs: (388, 283)
Remaining data recs: (167, 283)
Validation recs: (84, 283)
Remaining data recs: (83, 283)
Test recs: (83, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (576, 283)
Train recs: (403, 283)
Remaining data recs: (173, 283)
Validation recs: (86, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (475, 283)
Train recs: (332, 283)
Remaining data recs: (143, 283)
Validation recs: (72, 283)
Remaining data recs: (71, 283)
Test recs: (71, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (285, 283)
Train recs: (200, 283)
Remaining data recs: (85, 283)
Validation recs: (42, 283)
Remaining data recs: (43, 283)
Test recs: (43, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (499, 283)
Train recs: (349, 283)
Remaining data recs: (150, 283)
Validation recs: (75, 283)
Remaining data recs: (75, 283)
Test recs: (75, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (427, 283)
Train recs: (299, 283)
Remaining data recs: (128, 283)
Validation recs: (64, 283)
Remaining data recs: (64, 283)
Test recs: (64, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (429, 283)
Train recs: (300, 283)
Remaining data recs: (129, 283)
Validation recs: (64, 283)
Remaining data recs: (65, 283)
Test recs: (65, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (565, 283)
Train recs: (396, 283)
Remaining data recs: (169, 283)
Validation recs: (84, 283)
Remaining data recs: (85, 283)
Test recs: (85, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (479, 283)
Train recs: (335, 283)
Remaining data recs: (144, 283)
Validation recs: (72, 283)
Remaining data recs: (72, 283)
Test recs: (72, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (229, 283)
Train recs: (160, 283)
Remaining data recs: (69, 283)
Validation recs: (34, 283)
Remaining data recs: (35, 283)
Test recs: (35, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (471, 283)
Train recs: (330, 283)
Remaining data recs: (141, 283)
Validation recs: (70, 283)
Remaining data recs: (71, 283)
Test recs: (71, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (297, 283)
Train recs: (208, 283)
Remaining data recs: (89, 283)
Validation recs: (44, 283)
Remaining data recs: (45, 283)
Test recs: (45, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (568, 283)
Train recs: (398, 283)
Remaining data recs: (170, 283)
Validation recs: (85, 283)
Remaining data recs: (85, 283)
Test recs: (85, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (390, 283)
Train recs: (273, 283)
Remaining data recs: (117, 283)
Validation recs: (58, 283)
Remaining data recs: (59, 283)
Test recs: (59, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (139, 283)
Train recs: (97, 283)
Remaining data recs: (42, 283)
Validation recs: (21, 283)
Remaining data recs: (21, 283)
Test recs: (21, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (549, 283)
Train recs: (384, 283)
Remaining data recs: (165, 283)
Validation recs: (82, 283)
Remaining data recs: (83, 283)
Test recs: (83, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (449, 283)
Train recs: (314, 283)
Remaining data recs: (135, 283)
Validation recs: (68, 283)
Remaining data recs: (67, 283)
Test recs: (67, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (324, 283)
Train recs: (227, 283)
Remaining data recs: (97, 283)
Validation recs: (48, 283)
Remaining data recs: (49, 283)
Test recs: (49, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (409, 283)
Train recs: (286, 283)
Remaining data recs: (123, 283)
Validation recs: (62, 283)
Remaining data recs: (61, 283)
Test recs: (61, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (308, 283)
Train recs: (216, 283)
Remaining data recs: (92, 283)
Validation recs: (46, 283)
Remaining data recs: (46, 283)
Test recs: (46, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (567, 283)
Train recs: (397, 283)
Remaining data recs: (170, 283)
Validation recs: (85, 283)
Remaining data recs: (85, 283)
Test recs: (85, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (571, 283)
Train recs: (400, 283)
Remaining data recs: (171, 283)
Validation recs: (86, 283)
Remaining data recs: (85, 283)
Test recs: (85, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (578, 283)
Train recs: (405, 283)
Remaining data recs: (173, 283)
Validation recs: (86, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (573, 283)
Train recs: (401, 283)
Remaining data recs: (172, 283)
Validation recs: (86, 283)
Remaining data recs: (86, 283)
Test recs: (86, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (244, 283)
Train recs: (171, 283)
Remaining data recs: (73, 283)
Validation recs: (36, 283)
Remaining data recs: (37, 283)
Test recs: (37, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (130, 283)
Train recs: (91, 283)
Remaining data recs: (39, 283)
Validation recs: (20, 283)
Remaining data recs: (19, 283)
Test recs: (19, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (336, 283)
Train recs: (235, 283)
Remaining data recs: (101, 283)
Validation recs: (50, 283)
Remaining data recs: (51, 283)
Test recs: (51, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (364, 283)
Train recs: (255, 283)
Remaining data recs: (109, 283)
Validation recs: (54, 283)
Remaining data recs: (55, 283)
Test recs: (55, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (139, 283)
Train recs: (97, 283)
Remaining data recs: (42, 283)
Validation recs: (21, 283)
Remaining data recs: (21, 283)
Test recs: (21, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (587, 283)
Train recs: (411, 283)
Remaining data recs: (176, 283)
Validation recs: (88, 283)
Remaining data recs: (88, 283)
Test recs: (88, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (136, 283)
Train recs: (95, 283)
Remaining data recs: (41, 283)
Validation recs: (20, 283)
Remaining data recs: (21, 283)
Test recs: (21, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (443, 283)
Train recs: (310, 283)
Remaining data recs: (133, 283)
Validation recs: (66, 283)
Remaining data recs: (67, 283)
Test recs: (67, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


Data split.  Total recs: (207, 283)
Train recs: (145, 283)
Remaining data recs: (62, 283)
Validation recs: (31, 283)
Remaining data recs: (31, 283)
Test recs: (31, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (335, 283)
Train recs: (234, 283)
Remaining data recs: (101, 283)
Validation recs: (50, 283)
Remaining data recs: (51, 283)
Test recs: (51, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (471, 283)
Train recs: (330, 283)
Remaining data recs: (141, 283)
Validation recs: (70, 283)
Remaining data recs: (71, 283)
Test recs: (71, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (580, 283)
Train recs: (406, 283)
Remaining data recs: (174, 283)
Validation recs: (87, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (318, 283)
Train recs: (223, 283)
Remaining data recs: (95, 283)
Validation recs: (48, 283)
Remaining data recs: (47, 283)
Test recs: (47, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (606, 283)
Train recs: (424, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (458, 283)
Train recs: (321, 283)
Remaining data recs: (137, 283)
Validation recs: (68, 283)
Remaining data recs: (69, 283)
Test recs: (69, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (153, 283)
Train recs: (107, 283)
Remaining data recs: (46, 283)
Validation recs: (23, 283)
Remaining data recs: (23, 283)
Test recs: (23, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (108, 283)
Train recs: (76, 283)
Remaining data recs: (32, 283)
Validation recs: (16, 283)
Remaining data recs: (16, 283)
Test recs: (16, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (311, 283)
Train recs: (218, 283)
Remaining data recs: (93, 283)
Validation recs: (46, 283)
Remaining data recs: (47, 283)
Test recs: (47, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (102, 283)
Train recs: (71, 283)
Remaining data recs: (31, 283)
Validation recs: (16, 283)
Remaining data recs: (15, 283)
Test recs: (15, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (118, 283)
Train recs: (83, 283)
Remaining data recs: (35, 283)
Validation recs: (18, 283)
Remaining data recs: (17, 283)
Test recs: (17, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (101, 283)
Train recs: (71, 283)
Remaining data recs: (30, 283)
Validation recs: (15, 283)
Remaining data recs: (15, 283)
Test recs: (15, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (277, 283)
Train recs: (194, 283)
Remaining data recs: (83, 283)
Validation recs: (42, 283)
Remaining data recs: (41, 283)
Test recs: (41, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (543, 283)
Train recs: (380, 283)
Remaining data recs: (163, 283)
Validation recs: (82, 283)
Remaining data recs: (81, 283)
Test recs: (81, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (587, 283)
Train recs: (411, 283)
Remaining data recs: (176, 283)
Validation recs: (88, 283)
Remaining data recs: (88, 283)
Test recs: (88, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (554, 283)
Train recs: (388, 283)
Remaining data recs: (166, 283)
Validation recs: (83, 283)
Remaining data recs: (83, 283)
Test recs: (83, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (370, 283)
Train recs: (259, 283)
Remaining data recs: (111, 283)
Validation recs: (56, 283)
Remaining data recs: (55, 283)
Test recs: (55, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (371, 283)
Train recs: (260, 283)
Remaining data recs: (111, 283)
Validation recs: (56, 283)
Remaining data recs: (55, 283)
Test recs: (55, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (428, 283)
Train recs: (300, 283)
Remaining data recs: (128, 283)
Validation recs: (64, 283)
Remaining data recs: (64, 283)
Test recs: (64, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (526, 283)
Train recs: (368, 283)
Remaining data recs: (158, 283)
Validation recs: (79, 283)
Remaining data recs: (79, 283)
Test recs: (79, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (278, 283)
Train recs: (195, 283)
Remaining data recs: (83, 283)
Validation recs: (42, 283)
Remaining data recs: (41, 283)
Test recs: (41, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


Data split.  Total recs: (573, 283)
Train recs: (401, 283)
Remaining data recs: (172, 283)
Validation recs: (86, 283)
Remaining data recs: (86, 283)
Test recs: (86, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (493, 283)
Train recs: (345, 283)
Remaining data recs: (148, 283)
Validation recs: (74, 283)
Remaining data recs: (74, 283)
Test recs: (74, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (599, 283)
Train recs: (419, 283)
Remaining data recs: (180, 283)
Validation recs: (90, 283)
Remaining data recs: (90, 283)
Test recs: (90, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (416, 283)
Train recs: (291, 283)
Remaining data recs: (125, 283)
Validation recs: (62, 283)
Remaining data recs: (63, 283)
Test recs: (63, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (329, 283)
Train recs: (230, 283)
Remaining data recs: (99, 283)
Validation recs: (50, 283)
Remaining data recs: (49, 283)
Test recs: (49, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (314, 283)
Train recs: (220, 283)
Remaining data recs: (94, 283)
Validation recs: (47, 283)
Remaining data recs: (47, 283)
Test recs: (47, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (313, 283)
Train recs: (219, 283)
Remaining data recs: (94, 283)
Validation recs: (47, 283)
Remaining data recs: (47, 283)
Test recs: (47, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (403, 283)
Train recs: (282, 283)
Remaining data recs: (121, 283)
Validation recs: (60, 283)
Remaining data recs: (61, 283)
Test recs: (61, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (396, 283)
Train recs: (277, 283)
Remaining data recs: (119, 283)
Validation recs: (60, 283)
Remaining data recs: (59, 283)
Test recs: (59, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (174, 283)
Train recs: (122, 283)
Remaining data recs: (52, 283)
Validation recs: (26, 283)
Remaining data recs: (26, 283)
Test recs: (26, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (109, 283)
Train recs: (76, 283)
Remaining data recs: (33, 283)
Validation recs: (16, 283)
Remaining data recs: (17, 283)
Test recs: (17, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (468, 283)
Train recs: (328, 283)
Remaining data recs: (140, 283)
Validation recs: (70, 283)
Remaining data recs: (70, 283)
Test recs: (70, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (577, 283)
Train recs: (404, 283)
Remaining data recs: (173, 283)
Validation recs: (86, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (221, 283)
Train recs: (155, 283)
Remaining data recs: (66, 283)
Validation recs: (33, 283)
Remaining data recs: (33, 283)
Test recs: (33, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (266, 283)
Train recs: (186, 283)
Remaining data recs: (80, 283)
Validation recs: (40, 283)
Remaining data recs: (40, 283)
Test recs: (40, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (211, 283)
Train recs: (148, 283)
Remaining data recs: (63, 283)
Validation recs: (32, 283)
Remaining data recs: (31, 283)
Test recs: (31, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (576, 283)
Train recs: (403, 283)
Remaining data recs: (173, 283)
Validation recs: (86, 283)
Remaining data recs: (87, 283)
Test recs: (87, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (259, 283)
Train recs: (181, 283)
Remaining data recs: (78, 283)
Validation recs: (39, 283)
Remaining data recs: (39, 283)
Test recs: (39, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (218, 283)
Train recs: (153, 283)
Remaining data recs: (65, 283)
Validation recs: (32, 283)
Remaining data recs: (33, 283)
Test recs: (33, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (337, 283)
Train recs: (236, 283)
Remaining data recs: (101, 283)
Validation recs: (50, 283)
Remaining data recs: (51, 283)
Test recs: (51, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (363, 283)
Train recs: (254, 283)
Remaining data recs: (109, 283)
Validation recs: (54, 283)
Remaining data recs: (55, 283)
Test recs: (55, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (339, 283)
Train recs: (237, 283)
Remaining data recs: (102, 283)
Validation recs: (51, 283)
Remaining data recs: (51, 283)
Test recs: (51, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (335, 283)
Train recs: (234, 283)
Remaining data recs: (101, 283)
Validation recs: (50, 283)
Remaining data recs: (51, 283)
Test recs: (51, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (606, 283)
Train recs: (424, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (303, 283)
Train recs: (212, 283)
Remaining data recs: (91, 283)
Validation recs: (46, 283)
Remaining data recs: (45, 283)
Test recs: (45, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (291, 283)
Train recs: (204, 283)
Remaining data recs: (87, 283)
Validation recs: (44, 283)
Remaining data recs: (43, 283)
Test recs: (43, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (362, 283)
Train recs: (253, 283)
Remaining data recs: (109, 283)
Validation recs: (54, 283)
Remaining data recs: (55, 283)
Test recs: (55, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (183, 283)
Train recs: (128, 283)
Remaining data recs: (55, 283)
Validation recs: (28, 283)
Remaining data recs: (27, 283)
Test recs: (27, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (607, 283)
Train recs: (425, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


Data split.  Total recs: (383, 283)
Train recs: (268, 283)
Remaining data recs: (115, 283)
Validation recs: (58, 283)
Remaining data recs: (57, 283)
Test recs: (57, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (608, 283)
Train recs: (426, 283)
Remaining data recs: (182, 283)
Validation recs: (91, 283)
Remaining data recs: (91, 283)
Test recs: (91, 283)


  dip[i] = 100 * (dip_mio[i]/trs[i])
  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  dx = 100 * np.abs((dip - din) / (dip + din))


Data split.  Total recs: (595, 283)
Train recs: (416, 283)
Remaining data recs: (179, 283)
Validation recs: (90, 283)
Remaining data recs: (89, 283)
Test recs: (89, 283)
Consolidating whole market data
Combining into weekly data and adding ta
Calculating weekly data for allordpreviousclose
Adding ta data for allordpreviousclose
Calculating weekly data for asxpreviousclose
Adding ta data for asxpreviousclose
Calculating weekly data for 640106_A3597525W
Adding ta data for 640106_A3597525W
Calculating weekly data for FIRMMCRT
Adding ta data for FIRMMCRT
Calculating weekly data for FXRUSD
Adding ta data for FXRUSD
Calculating weekly data for GRCPAIAD
Adding ta data for GRCPAIAD
Calculating weekly data for GRCPAISAD
Adding ta data for GRCPAISAD
Calculating weekly data for GRCPBCAD
Adding ta data for GRCPBCAD
Calculating weekly data for GRCPBCSAD
Adding ta data for GRCPBCSAD
Calculating weekly data for GRCPBMAD
Adding ta data for GRCPBMAD
Calculating weekly data for GRCPNRAD
Adding ta data f

  dr = np.log(close).diff()


Calculating weekly data for H05_GLFSEPTPOP
Adding ta data for H05_GLFSEPTPOP
Concatenating whole market data
Concatenated whole market shape (616, 960)
Concatenating symbol dfs
Adding whole market data
Optimising symbol dfs
All data: (15780, 1243)
Training data: (73697, 1243)
Validation data: (15769, 1243)
Test data: (15780, 1243)


In [None]:
train_df

In [None]:
validation_df

In [None]:
test_df

In [None]:
test_cols = ['symbol', 'GICSSector', 'GICSIndustryGroup', 'GICSIndustry', 'totalVolume', 'weekLow', 
             'weekHigh', 'weekOpen', 'weekClose', 'ta_volume_adi', 'ta_volume_obv', 'ta_volume_obvm', 
             'ta_volume_cmf', 'ta_volume_fi', 'ta_volume_em', 'ta_volume_vpt', 'ta_volume_nvi', 
             'ta_volatility_atr', 'ta_volatility_bbh', 'ta_volatility_bbl', 'ta_volatility_bbm',
             'ta_volatility_bbhi', 'ta_volatility_bbli', 'ta_volatility_kcc', 'ta_volatility_kch',
             'ta_volatility_kcl', 'ta_volatility_kchi', 'ta_volatility_kcli', 'ta_volatility_dch', 
             'ta_volatility_dcl', 'ta_volatility_dchi', 'ta_volatility_dcli', 'ta_trend_macd', 
             'ta_trend_macd_signal', 'ta_trend_macd_diff', 'ta_trend_ema_fast', 'ta_trend_ema_slow',
             'ta_trend_adx', 'ta_trend_adx_pos', 'ta_trend_adx_neg', 'ta_trend_vortex_ind_pos', 
             'ta_trend_vortex_ind_neg', 'ta_trend_vortex_diff', 'ta_trend_trix', 'ta_trend_mass_index',
             'ta_trend_cci', 'ta_trend_dpo', 'ta_trend_kst', 'ta_trend_kst_sig', 'ta_trend_kst_diff', 
             'ta_trend_ichimoku_a', 'ta_trend_ichimoku_b', 'ta_trend_visual_ichimoku_a', 
             'ta_trend_visual_ichimoku_b', 'ta_trend_aroon_up', 'ta_trend_aroon_down', 'ta_trend_aroon_ind',
             'ta_momentum_rsi', 'ta_momentum_mfi', 'ta_momentum_tsi', 'ta_momentum_uo', 'ta_momentum_stoch', 
             'ta_momentum_stoch_signal', 'ta_momentum_wr', 'ta_momentum_ao', 'ta_others_dr', 'ta_others_dlr',
             'ta_others_cr']

target_col = 'target'

In [None]:
train_X = train_df.drop(target_col, axis=1)
train_y = train_df[target_col]

validate_X = validation_df.drop(target_col, axis=1)
validate_y = validation_df[target_col]

test_X = test_df.drop(target_col, axis=1)
test_y = test_df[target_col]


print('Number of columns:', train_X.shape[1])

stats = pd.DataFrame()    
stats["Mean"] = train_X.mean()
stats["Std.Dev"] = train_X.std()
stats["Var"] = train_X.var()
stats["NaNs"] = train_X.isnull().sum()
stats["NaN.Percent"] = stats["NaNs"] / train_X.shape[0] * 100


In [None]:
# train_X.var()
# train_X.isnull().sum()
stats

In [None]:
cols_to_remove = stats[(stats['Mean']==0) & (stats['Std.Dev']==0) & (stats['Var']==0)]
nan_cols_to_remove = stats[(stats['NaN.Percent'] > 75)]

print(cols_to_remove.index.values)
print(nan_cols_to_remove.index.values)

train_X.drop(cols_to_remove.index.values, axis=1, inplace=True)
train_X.drop(nan_cols_to_remove.index.values, axis=1, inplace=True)
validate_X.drop(cols_to_remove.index.values, axis=1, inplace=True)
validate_X.drop(nan_cols_to_remove.index.values, axis=1, inplace=True)
test_X.drop(cols_to_remove.index.values, axis=1, inplace=True)
test_X.drop(nan_cols_to_remove.index.values, axis=1, inplace=True)
train_X.shape

In [None]:
from scipy import stats

col_stats = pd.DataFrame()

for col in train_X.columns:
    current_col_result = stats.kendalltau(train_X[col].values, train_y.values)
    current_df = pd.DataFrame({"name": [col],
                               "correlation": [current_col_result[0]],
                               "p-value": [current_col_result[1]]})
    
    col_stats = col_stats.append(current_df)
    

In [None]:
col_stats.sort_values('p-value')

In [None]:
# Train encoder
from sklearn.preprocessing import LabelEncoder 

symbol_encoder = LabelEncoder()
train_X['symbol'] = symbol_encoder.fit_transform(train_X['symbol'].values)
validate_X['symbol'] = symbol_encoder.transform(validate_X['symbol'].values)
test_X['symbol'] = symbol_encoder.transform(test_X['symbol'].values)

GICSSector_encoder = LabelEncoder()
train_X['GICSSector'] = GICSSector_encoder.fit_transform(train_X['GICSSector'].values)
validate_X['GICSSector'] = GICSSector_encoder.transform(validate_X['GICSSector'].values)
test_X['GICSSector'] = GICSSector_encoder.transform(test_X['GICSSector'].values)

GICSIndustryGroup_encoder = LabelEncoder()
train_X['GICSIndustryGroup'] = GICSIndustryGroup_encoder.fit_transform(train_X['GICSIndustryGroup'].values)
validate_X['GICSIndustryGroup'] = GICSIndustryGroup_encoder.transform(validate_X['GICSIndustryGroup'].values)
test_X['GICSIndustryGroup'] = GICSIndustryGroup_encoder.transform(test_X['GICSIndustryGroup'].values)

GICSIndustry_encoder = LabelEncoder()
train_X['GICSIndustry'] = GICSIndustry_encoder.fit_transform(train_X['GICSIndustry'].values)
validate_X['GICSIndustry'] = GICSIndustry_encoder.transform(validate_X['GICSIndustry'].values)
test_X['GICSIndustry'] = GICSIndustry_encoder.transform(test_X['GICSIndustry'].values)


In [None]:
# Execute one hot encoding for bolinger vals

BOLLINGER_PREDICTION_CATEGORIES = ['Steady', 'Rising', 'Falling']
BOLLINGER_VALUE_CATEGORIES = ['Within', 'Below', 'Above']


def one_hot_encode_field(df, column_name, categories):
    new_cols = pd.get_dummies(df[column_name])
    new_cols = new_cols.astype('int8', errors='ignore')

    new_cols.T.reindex(categories).T.fillna(0)
    
    name_map = {}
    # rename the categories
    for val in categories:
        name_map[val] = column_name + '_' + val

    new_cols.rename(name_map, axis=1, inplace=True)
    
    # Remove the original column
    df.drop([column_name], axis=1, inplace=True)
    
    # Return df with new cols 
    return pd.concat([df, new_cols], axis=1)

for col in BOLLINGER_PREDICTION_COLUMNS:
    train_X = one_hot_encode_field(train_X, col, BOLLINGER_PREDICTION_CATEGORIES)
    validate_X = one_hot_encode_field(validate_X, col, BOLLINGER_PREDICTION_CATEGORIES)
    test_X = one_hot_encode_field(test_X, col, BOLLINGER_PREDICTION_CATEGORIES)

for col in BOLLINGER_VALUE_COLUMNS:
    train_X = one_hot_encode_field(train_X, col, BOLLINGER_VALUE_CATEGORIES)
    validate_X = one_hot_encode_field(validate_X, col, BOLLINGER_VALUE_CATEGORIES)
    test_X = one_hot_encode_field(test_X, col, BOLLINGER_VALUE_CATEGORIES)



In [None]:
# Fix NaNs

train_X.fillna(0, inplace=True)
validate_X.fillna(0, inplace=True)
test_X.fillna(0, inplace=True)

In [None]:
print(train_y.describe())
print(validate_y.describe())
print(test_y.describe())

In [None]:
# Apply standard scaler
from sklearn import preprocessing

# norm_data = preprocessing.normalize(train_X, axis=0, norm="l2")
# train_X = pd.DataFrame(norm_data, index=train_X.index, columns=list(train_X.columns.values))

# norm_data = preprocessing.normalize(validate_X, axis=0, norm="l2")
# validate_X = pd.DataFrame(norm_data, index=validate_X.index, columns=list(validate_X.columns.values))

# norm_data = preprocessing.normalize(test_X, axis=0, norm="l2")
# test_X = pd.DataFrame(norm_data, index=test_X.index, columns=list(test_X.columns.values))

scaler = preprocessing.RobustScaler()

train_X[train_X.columns] = scaler.fit_transform(train_X[train_X.columns])
validate_X[validate_X.columns] = scaler.transform(validate_X[validate_X.columns])
test_X[test_X.columns] = scaler.transform(test_X[test_X.columns])

In [None]:
validate_y.isna().sum()

In [None]:
# Look at feature importance using an xgb model
import xgboost as xgb
from xgboost import plot_importance
from stats_operations import safe_log


importance_model = xgb.XGBRegressor(nthread=8, tree_method='auto', predictor='cpu_predictor', learning_rate = 0.2,
                                   n_estimators=500)
#                                    n_estimators=150, max_depth=70, base_score=0.1, colsample_bylevel=0.7,
#                                    colsample_bytree=1.0, gamma=0, learning_rate=0.1, min_child_weight=3)


print('Training xgboost importance model...')

eval_set = [(validate_X.values, safe_log(validate_y.values))]
importance_model.fit(train_X.values, safe_log(train_y.values), early_stopping_rounds=50, 
                     eval_metric='mae', eval_set=eval_set, verbose=True)


In [None]:
def add_selected_features(df, feature_list):
    # if empty, initialise with first array
    if df.shape[0] == 0:
        df['feature'] = feature_list
        df['count'] = 1
    else:
        for feature in feature_list:
            # If it exists, increment, otherwise append and set to 1
            if len(df[df['feature']==feature]) > 0:
                df.loc[df['feature']==feature, 'count'] = df.loc[df['feature']==feature, 'count'] + 1
            else:
                df = df.append({'feature': feature, 'count': 1}, ignore_index=True)
            
    return df

In [None]:
feature_importance_df = pd.DataFrame({
    'column_name': train_X.columns,
    'importance': importance_model.feature_importances_
})

# feature_importance_df.sort_values('importance', ascending=False)
top_150 = feature_importance_df[feature_importance_df['importance'] > 0].sort_values('importance', ascending=False).head(150)
top_150['column_name'].values

feature_df = pd.DataFrame()
feature_df = add_selected_features(feature_df, top_150['column_name'].values)


In [None]:
print(feature_df.shape[0])
feature_df.sort_values('count', ascending=False)

In [None]:
## Use sklearn RFE to detrmine best n features

from sklearn.feature_selection import RFE
from sklearn.svm import SVR
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import LinearRegression
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

numberOfRequiredFeatures = 150
step = 0.05
attributes = train_X
target = safe_log(train_y.values)

def return_selected_features(attributes, selected_msk):
    selected_attributes = attributes.copy()
    index = 0
    indexNames = list(attributes.columns.values)
    for name in indexNames:
        if not selected_msk[index]:
            selected_attributes = selected_attributes.drop(name, axis=1)
        index = index + 1

    return selected_attributes.columns.values

estimator = SVR(kernel="linear")
selector = RFE(estimator, numberOfRequiredFeatures, step=step, verbose=1)
selector = selector.fit(attributes, target)
svr_support = selector.support_
svr_ranking = selector.ranking_
svr_features = return_selected_features(attributes, selector.support_)
print('SVR')
print(svr_features)
feature_df = add_selected_features(feature_df, svr_features)


# estimator = BayesianRidge()
# selector = RFE(estimator, numberOfRequiredFeatures, step=step, verbose=1)
# selector = selector.fit(attributes, target)
# br_support = selector.support_
# br_ranking = selector.ranking_
# br_features = return_selected_features(attributes, selector.support_)
# print('Bayesian Ridge')
# print(br_features)
# feature_df = add_selected_features(feature_df, br_features)

# estimator = LinearRegression()
# selector = RFE(estimator, numberOfRequiredFeatures, step=step, verbose=1)
# selector = selector.fit(attributes, target)
# lr_support = selector.support_
# lr_ranking = selector.ranking_
# lr_features = return_selected_features(attributes, selector.support_)
# print('Linear Regression')
# print(lr_features)
# feature_df = add_selected_features(feature_df, lr_features)


# kernel = DotProduct() + WhiteKernel()
# estimator = GaussianProcessRegressor(kernel=kernel, random_state=0)
# selector = RFE(estimator, numberOfRequiredFeatures, step=step, verbose=1)
# selector = selector.fit(attributes, target)
# gpr_support = selector.support_
# gpr_ranking = selector.ranking_
# gpr_features = return_selected_features(attributes, selector.support_)
# print('Gaussian Process Regressor')
# print(gpr_features)
# feature_df = add_selected_features(feature_df, gpr_features)




In [None]:
pca_train = PCAForPandas(n_components=100)
X_train_pca = pca_train.fit_transform(train_X, train_y.values)

In [None]:
X_train_pca

In [None]:
# Look at feature importance using an xgb model
import xgboost as xgb
from xgboost import plot_importance
from sklearn.model_selection import train_test_split


pca_importance_model = xgb.XGBRegressor(nthread=8, tree_method='auto', predictor='cpu_predictor', 
                                        n_estimators=250, max_depth=70, base_score=0.1, 
                                        colsample_bylevel=0.7, colsample_bytree=1.0, 
                                        gamma=0, learning_rate=0.05, min_child_weight=3)


print('Training xgboost pca importance model...')
x_train, x_test, y_train, y_test = train_test_split(X_train_pca.values, shift_target['eight_week_return'].values, 
                                                    test_size=0.15)

eval_set = [(x_test, y_test)]
pca_importance_model.fit(x_train, y_train, early_stopping_rounds=10, eval_metric='mae', eval_set=eval_set, verbose=True)


In [None]:
pca_feature_importance_df = pd.DataFrame({
    'column_name': X_train_pca.columns,
    'importance': pca_importance_model.feature_importances_
})

pca_feature_importance_df.sort_values('importance', ascending=False)
# feature_importance_df[feature_importance_df['importance'] > 0]

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd


class PCAForPandas(PCA):
    """This class is just a small wrapper around the PCA estimator of sklearn including normalization to make it 
    compatible with pandas DataFrames.
    """

    def __init__(self, **kwargs):
        self._z_scaler = StandardScaler()
        super(self.__class__, self).__init__(**kwargs)

        self._X_columns = None

    def fit(self, X, y=None):
        """Normalize X and call the fit method of the base class with numpy arrays instead of pandas data frames."""

        X = self._prepare(X)

        self._z_scaler.fit(X.values, y)
        z_data = self._z_scaler.transform(X.values, y)

        return super(self.__class__, self).fit(z_data, y)

    def fit_transform(self, X, y=None):
        """Call the fit and the transform method of this class."""

        X = self._prepare(X)

        self.fit(X, y)
        return self.transform(X, y)

    def transform(self, X, y=None):
        """Normalize X and call the transform method of the base class with numpy arrays instead of pandas data frames."""

        X = self._prepare(X)

        z_data = self._z_scaler.transform(X.values, y)

        transformed_ndarray = super(self.__class__, self).transform(z_data)

        pandas_df = pd.DataFrame(transformed_ndarray)
        pandas_df.columns = ["pca_{}".format(i) for i in range(len(pandas_df.columns))]

        return pandas_df

    def _prepare(self, X):
        """Check if the data is a pandas DataFrame and sorts the column names.

        :raise AttributeError: if pandas is not a DataFrame or the columns of the new X is not compatible with the 
                               columns from the previous X data
        """
        if not isinstance(X, pd.DataFrame):
            raise AttributeError("X is not a pandas DataFrame")

        X.sort_index(axis=1, inplace=True)

        if self._X_columns is not None:
            if self._X_columns != list(X.columns):
                raise AttributeError("The columns of the new X is not compatible with the columns from the previous X data")
        else:
            self._X_columns = list(X.columns)

        return X