In [1]:
%load_ext autoreload

In [12]:
%autoreload 2
%matplotlib inline

In [3]:
import pandas as pd
import numpy as np
import yaml
import json
import os
from flatten_dict import unflatten, flatten
from filter_utils import *
from pandas.api.types import is_numeric_dtype
import seaborn as sns
from sklearn.preprocessing import minmax_scale, LabelEncoder




import multiprocessing as mp
print("Number of processors: ", mp.cpu_count())
os.getcwd()

Number of processors:  8


'/Users/morarica/Developer/SepMe/Jupyters'

## 1. Getting the human annotations

In [4]:
def underscore_reducer(k1, k2):
    if k1 is None:
        return str(k2)
    else:
        return str(k1) + "_" + str(k2)

In [5]:
folder = '/Users/morarica/Developer/SepMe/data/SPLOM_data/SepMe/Data/Human_judgment/'
print(len(sorted(os.listdir(folder))))
df_a = []
df_m = []
for i, file in enumerate(sorted(os.listdir(folder))):
    if file.endswith('csv'):
        #print(file)
        df = pd.read_csv(folder+file, header=None)
        df.columns = np.array(df.columns)+1
        
        if len(df)>4:
            #print(df)
            df = df.head(4)
        
        df.index = ['PCA', 'RobPCA', 'GlimmerMDS', 'tSNE']
        
        person = file.split('.csv')[0].split('-')[-1]
        dd ={}
        dd[file.split('-')[0]] = df.to_dict('index')
        df1 = pd.DataFrame.from_dict(flatten(dd, underscore_reducer), orient='index', columns = [person])
        
        #print(person + '  ' + file)
        if person =='M':
            df_m.append(df1)
        else:
            df_a.append(df1)

dfmm = pd.concat(df_m, axis=0)
dfaa = pd.concat(df_a, axis=0)

df = pd.concat([dfmm, dfaa], axis=1)


df['class'] = [ii.split('_')[-1] for ii in df.index]
df.dropna(axis=0, inplace=True)

df = df.astype({'A': 'int32', 'M': 'int32'})
print(df.shape)

153
(1669, 3)


In [6]:
df['dim_x'] = 1
df['dim_y'] = 2
df = df[df.A.between(1,5) & df.M.between(1,5)]
df = df.reset_index()
print(df.shape)
df.head()


names = ['_'.join(row.split('_')[:-1]) for row in list(df['index'])]
#names = list(set(names))

df['fileName'] = names

(1648, 6)


In [7]:
df.to_csv('/Users/morarica/Developer/SepMe/data/orig_data/human_reduced_results.csv', index=False)

## 2. Plot inputs

In [8]:
def remove_outlier(df, dims = [.05,.95]):
    low = dims[0]
    high = dims[1]
    quant_df = df.quantile([low, high])
    #print(quant_df)
    for name in list(df.columns):
        if name in ['x','y']:
            df = df[(df[name] > quant_df.loc[low, name]) 
               & (df[name] < quant_df.loc[high, name])]
        #print(len(df))
    return df

def preprocess_df(df, dims, size = 0.5, occlusion = 0.1, save=None, sort=True):
    
    ddf = df[dims + ['class']].copy()
    #print(ddf.head())
    
    #print(ddf.shape)
    classes = list(set(ddf['class']))
    if len(classes)>10:
        print('Classes: {}'.format(len(classes)))
        ddf.columns = ['x', 'y', 'orig_class'] #rename cols
        merge_classes = list(ddf[['orig_class', 'x']].groupby(['orig_class']).count()
                             .sort_values(['x'], ascending=True).head(len(classes)-9).index)
        
        ddf['class'] = [row if row not in merge_classes else -1 for i,row in ddf['orig_class'].items()]
        
        le = LabelEncoder()
        ddf['class'] = le.fit_transform(ddf['class'])
        
    else: 
        ddf.columns = ['x', 'y', 'class'] #rename cols
        le = LabelEncoder()
        ddf['orig_class'] = ddf['class']
        ddf['class'] = le.fit_transform(ddf['orig_class'])
        
        
    ddf = remove_outlier(ddf, [0.05, 0.95]) #remove outliers
    ddf.iloc[:,:2] = minmax_scale(ddf.iloc[:, :2]) #scale between 0 and 1
    
    if sort == True:
        ddf = ddf.sort_values(['x', 'y']) # sort
        
    #print(ddf.shape)   
    
    ddf.reset_index(inplace=True, drop=True) #reset_index 
    
    # filter occluded circles
    buff = ((max(ddf.x) - min(ddf.x)) / (np.sqrt(len(ddf.x))*2))*size
    circles = get_circles(ddf, buff)
    rem_indexes = remove_circles_by_partition(ddf, circles, 2*buff, occlusion)
    ddf = ddf.drop(rem_indexes)
    circles = circles.drop(rem_indexes)
    
    if save is not None:
        ddf.to_csv(save, index=False)
    
    return ddf, circles


def plot_colored_circles(ax, df, circles, class_name = 'class'):
    
    for i, row in df.iterrows():
        ax.add_patch(PolygonPatch(circles[i], fc = sns.color_palette("colorblind")[int(row[class_name])], ec = 'none', alpha = 1))
            
    ax.autoscale()
    ax.set_aspect('equal', 'datalim')

    return ax

def get_dimred_data(df, input_folder, save_folder, fig_folder):
    found = 0
    not_found = []
    names = ['_'.join(row.split('_')[:-1]) for row in list(df['index'])]
    names = list(set(names))
    
    for i, nn in enumerate(names):
        
        print('Processing file {}/{}. Name: {}'.format(i, len(names), nn))
        
        method = nn.split('_')[-1]
        #print(method)
        if method in {'PCA', 'RobPCA'}:
            name = nn + '_data.csv'
        else: 
            name = nn + '_2.csv'
            
        #print(name)
            
        try:
            data = pd.read_csv(input_folder+method+'/'+name)
            found += 1
            
            dim_x = str(df.loc[i, 'dim_x'])
            dim_y = str(df.loc[i, 'dim_y'])
            
            file_name = save_folder + nn + '_' + dim_x +'-' + dim_y +'.csv'
            
            if os.path.exists(file_name) is True:
                print('File was already processed. Skipping: {}'.format(file_name))
                continue

            ddf, circles = preprocess_df(data, [dim_x, dim_y], size = 0.5, occlusion = 0.1, save=file_name, sort=True)


            fig, ax = plt.subplots(figsize = (10,10))
            plot_colored_circles(ax, ddf, circles)
            fig.savefig(fig_folder + names[i] + '.pdf')
            plt.close('all')


        except FileNotFoundError:
            #print(nn)
            print('File not found: {}'.format(name))
            not_found.append(name)
            
    return 



In [9]:
input_folder = '/Users/morarica/Developer/SepMe/data/orig_data/input_data/Reduced_orig_data/'
save_folder = '/Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/'
fig_folder = '/Users/morarica/Developer/SepMe/data/orig_data/figures/reduced_data/'

get_dimred_data(df, input_folder, save_folder, fig_folder)

Processing file 0/216. Name: boston_GlimmerMDS
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/boston_GlimmerMDS_1-2.csv
Processing file 1/216. Name: bbdm13_GlimmerMDS
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/bbdm13_GlimmerMDS_1-2.csv
Processing file 2/216. Name: cars03Cropped_d3_RobPCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/cars03Cropped_d3_RobPCA_1-2.csv
Processing file 3/216. Name: ms_interleaved_120_240_3d_25_centeredClusters_tSNE
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/ms_interleaved_120_240_3d_25_centeredClusters_tSNE_1-2.csv
Processing file 4/216. Name: fisheries_clusteredByHarvestRule_PCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/fisheries_clus

File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/ecoliproteins_GlimmerMDS_1-2.csv
Processing file 46/216. Name: ms_interleaved_60_120_3d_0_centeredClusters_PCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/ms_interleaved_60_120_3d_0_centeredClusters_PCA_1-2.csv
Processing file 47/216. Name: interleaved_100_200_4d_0_notcramped_notrotated_tSNE
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/interleaved_100_200_4d_0_notcramped_notrotated_tSNE_1-2.csv
Processing file 48/216. Name: interleaved_100_200_4d_25_cramped_rotated_RobPCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/interleaved_100_200_4d_25_cramped_rotated_RobPCA_1-2.csv
Processing file 49/216. Name: iris_tSNE
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/

File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/cars03Cropped_d1_PCA_1-2.csv
Processing file 110/216. Name: n100_PCA
File not found: n100_PCA_data.csv
Processing file 111/216. Name: grid10_3d_GlimmerMDS
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/grid10_3d_GlimmerMDS_1-2.csv
Processing file 112/216. Name: hiv_GlimmerMDS
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/hiv_GlimmerMDS_1-2.csv
Processing file 113/216. Name: ms_interleaved_60_120_3d_25_centeredClusters_tSNE
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/ms_interleaved_60_120_3d_25_centeredClusters_tSNE_1-2.csv
Processing file 114/216. Name: white_ballance_RobPCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/white

File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/shuttle_big_PCA_1-2.csv
Processing file 165/216. Name: ms_interleaved_40_80_3d_0_GlimmerMDS
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/ms_interleaved_40_80_3d_0_GlimmerMDS_1-2.csv
Processing file 166/216. Name: spambase_PCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/spambase_PCA_1-2.csv
Processing file 167/216. Name: efashion_GlimmerMDS
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/efashion_GlimmerMDS_1-2.csv
Processing file 168/216. Name: ms_interleaved_60_120_3d_25_centeredClusters_PCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/ms_interleaved_60_120_3d_25_centeredClusters_PCA_1-2.csv
Processing file 169/216. Name: f

File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/cars03Cropped_d1_RobPCA_1-2.csv
Processing file 215/216. Name: grid10_3d_PCA
File was already processed. Skipping: /Users/morarica/Developer/SepMe/data/orig_data/input_data/reduced_clean/grid10_3d_PCA_1-2.csv


## 3. Plot dimensions in original data

In [10]:
def save_and_plot_all_dimensions(orig_dir, save_dir, fig_dir, class_cols):
    for i, file in enumerate(os.listdir(orig_dir)):
        if file.endswith('.csv'):
            print('Processing file {}/{}. Name: {}'.format(i, len(os.listdir(orig_dir)), file))
            if file in ['shuttle_small_origClassLabels.csv', 'shuttle_big.csv']:
                continue
            
            df = pd.read_csv(orig_dir + file)

            c1s = []
            for c1 in df.columns:
                if not np.issubdtype(df[c1].dtype, np.number) or (c1 in class_cols):
                    continue

                c1s.append(c1)
                for c2 in df.columns[:-1]:
                    if (not np.issubdtype(df[c1].dtype, np.number)) or (c2 in c1s) or (c2 in class_cols):
                        continue

                    file_name = save_dir + file.split('.csv')[0] + '_{}-{}'.format(c1,c2) + '.csv'
                    fig_name = fig_dir + file.split('.csv')[0] + '_{}-{}'.format(c1,c2) + '.pdf'
                    
                    if os.path.exists(file_name) is True:
                        print('File was already processed. Skipping: {}'.format(file_name.split('/')[-1]))
                        continue

                    try: 
                        print('{}-{}'.format(c1,c2))
                        ddf, circles = preprocess_df(df, [c1, c2], size = 0.5, 
                                                 occlusion = 0.1, save=file_name, sort=True)
                        fig, ax = plt.subplots(figsize = (10,10))
                        plot_colored_circles(ax, ddf, circles)
                        fig.savefig(fig_name)
                        plt.close('all')
                        
                    except Exception as e: 
                        print('File {} was not processed for columns ({},{}).'.format(file,c1,c2))
                        print(e)
                        print('')


In [11]:
orig_dir = "/Users/morarica/Developer/SepMe/data/orig_data/input_data/Orig_data/"
save_dir = "/Users/morarica/Developer/SepMe/data/orig_data/input_data/dims_clean/"
fig_dir = "/Users/morarica/Developer/SepMe/data/orig_data/figures/orig_dim_data/"
class_cols = ['class']

save_and_plot_all_dimensions(orig_dir, save_dir, fig_dir, class_cols)

Processing file 0/104. Name: grid10_3d.csv
File was already processed. Skipping: grid10_3d_d1-d2.csv
File was already processed. Skipping: grid10_3d_d1-d3.csv
File was already processed. Skipping: grid10_3d_d2-d3.csv
Processing file 1/104. Name: interleaved_100_200_4d_0_notcramped_notrotated.csv
File was already processed. Skipping: interleaved_100_200_4d_0_notcramped_notrotated_d1-d2.csv
File was already processed. Skipping: interleaved_100_200_4d_0_notcramped_notrotated_d1-d3.csv
File was already processed. Skipping: interleaved_100_200_4d_0_notcramped_notrotated_d1-d4.csv
File was already processed. Skipping: interleaved_100_200_4d_0_notcramped_notrotated_d2-d3.csv
File was already processed. Skipping: interleaved_100_200_4d_0_notcramped_notrotated_d2-d4.csv
File was already processed. Skipping: interleaved_100_200_4d_0_notcramped_notrotated_d3-d4.csv
Processing file 2/104. Name: abalone.csv
File was already processed. Skipping: abalone_x1-x2.csv
File was already processed. Skipping

File ionosphere.csv was not processed for columns (D2,D4).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D2-D5
File ionosphere.csv was not processed for columns (D2,D5).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D2-D6
File ionosphere.csv was not processed for columns (D2,D6).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D2-D7
File ionosphere.csv was not processed for columns (D2,D7).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D2-D8
File ionosphere.csv was not processed for columns (D2,D8).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D2-D9
File ionosphere.csv was not processed for columns (D2,D9).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D2-D10
File ionosphere.csv was not processed for columns (D2,D10).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is require

File was already processed. Skipping: ionosphere_D14-D30.csv
File was already processed. Skipping: ionosphere_D14-D31.csv
File was already processed. Skipping: ionosphere_D14-D32.csv
File was already processed. Skipping: ionosphere_D14-D33.csv
File was already processed. Skipping: ionosphere_D14-D34.csv
File was already processed. Skipping: ionosphere_D15-D16.csv
File was already processed. Skipping: ionosphere_D15-D17.csv
File was already processed. Skipping: ionosphere_D15-D18.csv
File was already processed. Skipping: ionosphere_D15-D19.csv
File was already processed. Skipping: ionosphere_D15-D20.csv
File was already processed. Skipping: ionosphere_D15-D21.csv
File was already processed. Skipping: ionosphere_D15-D22.csv
File was already processed. Skipping: ionosphere_D15-D23.csv
File was already processed. Skipping: ionosphere_D15-D24.csv
File was already processed. Skipping: ionosphere_D15-D25.csv
File was already processed. Skipping: ionosphere_D15-D26.csv
File was already process

File bbdm13_origClassLabels.csv was not processed for columns (V10,V12).
float division by zero

V10-V13
File bbdm13_origClassLabels.csv was not processed for columns (V10,V13).
float division by zero

File was already processed. Skipping: bbdm13_origClassLabels_V11-V12.csv
File was already processed. Skipping: bbdm13_origClassLabels_V11-V13.csv
File was already processed. Skipping: bbdm13_origClassLabels_V12-V13.csv
Processing file 9/104. Name: interleaved_100_200_5d_25_cramped_rotated.csv
File was already processed. Skipping: interleaved_100_200_5d_25_cramped_rotated_d1-d2.csv
File was already processed. Skipping: interleaved_100_200_5d_25_cramped_rotated_d1-d3.csv
File was already processed. Skipping: interleaved_100_200_5d_25_cramped_rotated_d1-d4.csv
File was already processed. Skipping: interleaved_100_200_5d_25_cramped_rotated_d1-d5.csv
File was already processed. Skipping: interleaved_100_200_5d_25_cramped_rotated_d2-d3.csv
File was already processed. Skipping: interleaved_100_

File ionosphere_origClassLabels.csv was not processed for columns (D1,D27).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D1-D28
File ionosphere_origClassLabels.csv was not processed for columns (D1,D28).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D1-D29
File ionosphere_origClassLabels.csv was not processed for columns (D1,D29).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D1-D30
File ionosphere_origClassLabels.csv was not processed for columns (D1,D30).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D1-D31
File ionosphere_origClassLabels.csv was not processed for columns (D1,D31).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D1-D32
File ionosphere_origClassLabels.csv was not processed for columns (D1,D32).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

D1-D33
File ionosphere_origClassLab

File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x2.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x3.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x4.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x5.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x6.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x7.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x8.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x1-x9.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x2-x3.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x2-x4.csv
File was already processed. Skipping: breast-cancer-wisconsin_origClassLabels_x2-x5.csv
File was already processed. Skip

File bbdm13.csv was not processed for columns (V10,V12).
float division by zero

V10-V13
File bbdm13.csv was not processed for columns (V10,V13).
float division by zero

File was already processed. Skipping: bbdm13_V11-V12.csv
File was already processed. Skipping: bbdm13_V11-V13.csv
File was already processed. Skipping: bbdm13_V12-V13.csv
Processing file 26/104. Name: ecoliproteins.csv
File was already processed. Skipping: ecoliproteins_d1-d2.csv
d1-d3
File ecoliproteins.csv was not processed for columns (d1,d3).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d1-d4
File ecoliproteins.csv was not processed for columns (d1,d4).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: ecoliproteins_d1-d5.csv
File was already processed. Skipping: ecoliproteins_d1-d6.csv
File was already processed. Skipping: ecoliproteins_d1-d7.csv
d2-d3
File ecoliproteins.csv was not processed for columns (d2,d3).
F

File was already processed. Skipping: hiv_d6-d7.csv
File was already processed. Skipping: hiv_d6-d8.csv
File was already processed. Skipping: hiv_d6-d9.csv
File was already processed. Skipping: hiv_d6-d10.csv
File was already processed. Skipping: hiv_d6-d11.csv
File was already processed. Skipping: hiv_d6-d12.csv
File was already processed. Skipping: hiv_d6-d13.csv
File was already processed. Skipping: hiv_d6-d14.csv
File was already processed. Skipping: hiv_d6-d15.csv
File was already processed. Skipping: hiv_d6-d16.csv
File was already processed. Skipping: hiv_d6-d17.csv
File was already processed. Skipping: hiv_d6-d18.csv
File was already processed. Skipping: hiv_d6-d19.csv
File was already processed. Skipping: hiv_d6-d20.csv
File was already processed. Skipping: hiv_d6-d21.csv
File was already processed. Skipping: hiv_d6-d22.csv
File was already processed. Skipping: hiv_d6-d23.csv
File was already processed. Skipping: hiv_d6-d24.csv
File was already processed. Skipping: hiv_d6-d25.

File was already processed. Skipping: hiv_d16-d49.csv
File was already processed. Skipping: hiv_d16-d50.csv
File was already processed. Skipping: hiv_d16-d51.csv
File was already processed. Skipping: hiv_d16-d52.csv
File was already processed. Skipping: hiv_d16-d53.csv
File was already processed. Skipping: hiv_d16-d54.csv
File was already processed. Skipping: hiv_d16-d55.csv
File was already processed. Skipping: hiv_d16-d56.csv
File was already processed. Skipping: hiv_d16-d57.csv
File was already processed. Skipping: hiv_d16-d58.csv
File was already processed. Skipping: hiv_d16-d59.csv
File was already processed. Skipping: hiv_d16-d60.csv
File was already processed. Skipping: hiv_d16-d61.csv
File was already processed. Skipping: hiv_d16-d62.csv
File was already processed. Skipping: hiv_d16-d63.csv
File was already processed. Skipping: hiv_d16-d64.csv
File was already processed. Skipping: hiv_d16-d65.csv
File was already processed. Skipping: hiv_d16-d66.csv
File was already processed. 

File was already processed. Skipping: hiv_d26-d43.csv
File was already processed. Skipping: hiv_d26-d44.csv
File was already processed. Skipping: hiv_d26-d45.csv
File was already processed. Skipping: hiv_d26-d46.csv
File was already processed. Skipping: hiv_d26-d47.csv
File was already processed. Skipping: hiv_d26-d48.csv
File was already processed. Skipping: hiv_d26-d49.csv
File was already processed. Skipping: hiv_d26-d50.csv
File was already processed. Skipping: hiv_d26-d51.csv
File was already processed. Skipping: hiv_d26-d52.csv
File was already processed. Skipping: hiv_d26-d53.csv
File was already processed. Skipping: hiv_d26-d54.csv
File was already processed. Skipping: hiv_d26-d55.csv
File was already processed. Skipping: hiv_d26-d56.csv
File was already processed. Skipping: hiv_d26-d57.csv
File was already processed. Skipping: hiv_d26-d58.csv
File was already processed. Skipping: hiv_d26-d59.csv
File was already processed. Skipping: hiv_d26-d60.csv
File was already processed. 

File was already processed. Skipping: hiv_d37-d148.csv
File was already processed. Skipping: hiv_d37-d149.csv
File was already processed. Skipping: hiv_d37-d150.csv
File was already processed. Skipping: hiv_d37-d151.csv
File was already processed. Skipping: hiv_d37-d152.csv
File was already processed. Skipping: hiv_d37-d153.csv
File was already processed. Skipping: hiv_d37-d154.csv
File was already processed. Skipping: hiv_d37-d155.csv
File was already processed. Skipping: hiv_d37-d156.csv
File was already processed. Skipping: hiv_d37-d157.csv
File was already processed. Skipping: hiv_d37-d158.csv
File was already processed. Skipping: hiv_d37-d159.csv
File was already processed. Skipping: hiv_d38-d39.csv
File was already processed. Skipping: hiv_d38-d40.csv
File was already processed. Skipping: hiv_d38-d41.csv
File was already processed. Skipping: hiv_d38-d42.csv
File was already processed. Skipping: hiv_d38-d43.csv
File was already processed. Skipping: hiv_d38-d44.csv
File was already

File was already processed. Skipping: hiv_d49-d139.csv
File was already processed. Skipping: hiv_d49-d140.csv
File was already processed. Skipping: hiv_d49-d141.csv
File was already processed. Skipping: hiv_d49-d142.csv
File was already processed. Skipping: hiv_d49-d143.csv
File was already processed. Skipping: hiv_d49-d144.csv
File was already processed. Skipping: hiv_d49-d145.csv
File was already processed. Skipping: hiv_d49-d146.csv
File was already processed. Skipping: hiv_d49-d147.csv
File was already processed. Skipping: hiv_d49-d148.csv
File was already processed. Skipping: hiv_d49-d149.csv
File was already processed. Skipping: hiv_d49-d150.csv
File was already processed. Skipping: hiv_d49-d151.csv
File was already processed. Skipping: hiv_d49-d152.csv
File was already processed. Skipping: hiv_d49-d153.csv
File was already processed. Skipping: hiv_d49-d154.csv
File was already processed. Skipping: hiv_d49-d155.csv
File was already processed. Skipping: hiv_d49-d156.csv
File was a

File was already processed. Skipping: hiv_d63-d150.csv
File was already processed. Skipping: hiv_d63-d151.csv
File was already processed. Skipping: hiv_d63-d152.csv
File was already processed. Skipping: hiv_d63-d153.csv
File was already processed. Skipping: hiv_d63-d154.csv
File was already processed. Skipping: hiv_d63-d155.csv
File was already processed. Skipping: hiv_d63-d156.csv
File was already processed. Skipping: hiv_d63-d157.csv
File was already processed. Skipping: hiv_d63-d158.csv
File was already processed. Skipping: hiv_d63-d159.csv
File was already processed. Skipping: hiv_d64-d65.csv
File was already processed. Skipping: hiv_d64-d66.csv
File was already processed. Skipping: hiv_d64-d67.csv
File was already processed. Skipping: hiv_d64-d68.csv
File was already processed. Skipping: hiv_d64-d69.csv
File was already processed. Skipping: hiv_d64-d70.csv
File was already processed. Skipping: hiv_d64-d71.csv
File was already processed. Skipping: hiv_d64-d72.csv
File was already p

File was already processed. Skipping: hiv_d79-d87.csv
File was already processed. Skipping: hiv_d79-d88.csv
File was already processed. Skipping: hiv_d79-d89.csv
File was already processed. Skipping: hiv_d79-d90.csv
File was already processed. Skipping: hiv_d79-d91.csv
File was already processed. Skipping: hiv_d79-d92.csv
File was already processed. Skipping: hiv_d79-d93.csv
File was already processed. Skipping: hiv_d79-d94.csv
File was already processed. Skipping: hiv_d79-d95.csv
File was already processed. Skipping: hiv_d79-d96.csv
File was already processed. Skipping: hiv_d79-d97.csv
File was already processed. Skipping: hiv_d79-d98.csv
File was already processed. Skipping: hiv_d79-d99.csv
File was already processed. Skipping: hiv_d79-d100.csv
File was already processed. Skipping: hiv_d79-d101.csv
File was already processed. Skipping: hiv_d79-d102.csv
File was already processed. Skipping: hiv_d79-d103.csv
File was already processed. Skipping: hiv_d79-d104.csv
File was already proces

File was already processed. Skipping: hiv_d98-d124.csv
File was already processed. Skipping: hiv_d98-d125.csv
File was already processed. Skipping: hiv_d98-d126.csv
File was already processed. Skipping: hiv_d98-d127.csv
File was already processed. Skipping: hiv_d98-d128.csv
File was already processed. Skipping: hiv_d98-d129.csv
File was already processed. Skipping: hiv_d98-d130.csv
File was already processed. Skipping: hiv_d98-d131.csv
File was already processed. Skipping: hiv_d98-d132.csv
File was already processed. Skipping: hiv_d98-d133.csv
File was already processed. Skipping: hiv_d98-d134.csv
File was already processed. Skipping: hiv_d98-d135.csv
File was already processed. Skipping: hiv_d98-d136.csv
File was already processed. Skipping: hiv_d98-d137.csv
File was already processed. Skipping: hiv_d98-d138.csv
File was already processed. Skipping: hiv_d98-d139.csv
File was already processed. Skipping: hiv_d98-d140.csv
File was already processed. Skipping: hiv_d98-d141.csv
File was a

File was already processed. Skipping: hiv_d122-d135.csv
File was already processed. Skipping: hiv_d122-d136.csv
File was already processed. Skipping: hiv_d122-d137.csv
File was already processed. Skipping: hiv_d122-d138.csv
File was already processed. Skipping: hiv_d122-d139.csv
File was already processed. Skipping: hiv_d122-d140.csv
File was already processed. Skipping: hiv_d122-d141.csv
File was already processed. Skipping: hiv_d122-d142.csv
File was already processed. Skipping: hiv_d122-d143.csv
File was already processed. Skipping: hiv_d122-d144.csv
File was already processed. Skipping: hiv_d122-d145.csv
File was already processed. Skipping: hiv_d122-d146.csv
File was already processed. Skipping: hiv_d122-d147.csv
File was already processed. Skipping: hiv_d122-d148.csv
File was already processed. Skipping: hiv_d122-d149.csv
File was already processed. Skipping: hiv_d122-d150.csv
File was already processed. Skipping: hiv_d122-d151.csv
File was already processed. Skipping: hiv_d122-d

File cereal_origClassLabels.csv was not processed for columns (Fibre,Complex_carbohydrates).
float division by zero

Fibre-Sugars
File cereal_origClassLabels.csv was not processed for columns (Fibre,Sugars).
float division by zero

Fibre-Display_shelf
File cereal_origClassLabels.csv was not processed for columns (Fibre,Display_shelf).
float division by zero

Fibre-Potassium
File cereal_origClassLabels.csv was not processed for columns (Fibre,Potassium).
float division by zero

File was already processed. Skipping: cereal_origClassLabels_Complex_carbohydrates-Sugars.csv
File was already processed. Skipping: cereal_origClassLabels_Complex_carbohydrates-Display_shelf.csv
File was already processed. Skipping: cereal_origClassLabels_Complex_carbohydrates-Potassium.csv
Sugars-Display_shelf
File cereal_origClassLabels.csv was not processed for columns (Sugars,Display_shelf).
float division by zero

Sugars-Potassium
File cereal_origClassLabels.csv was not processed for columns (Sugars,Potassiu

File spambase.csv was not processed for columns (d1,d34).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d1-d35.csv
File was already processed. Skipping: spambase_d1-d36.csv
File was already processed. Skipping: spambase_d1-d37.csv
d1-d38
File spambase.csv was not processed for columns (d1,d38).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d1-d39.csv
File was already processed. Skipping: spambase_d1-d40.csv
d1-d41
File spambase.csv was not processed for columns (d1,d41).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d1-d42.csv
File was already processed. Skipping: spambase_d1-d43.csv
File was already processed. Skipping: spambase_d1-d44.csv
File was already processed. Skipping: spambase_d1-d45.csv
File was already processed. Skipping: spambase_d1-d46

File spambase.csv was not processed for columns (d4,d13).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d4-d14
File spambase.csv was not processed for columns (d4,d14).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d4-d15
File spambase.csv was not processed for columns (d4,d15).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d4-d16
File spambase.csv was not processed for columns (d4,d16).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d4-d17
File spambase.csv was not processed for columns (d4,d17).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d4-d18
File spambase.csv was not processed for columns (d4,d18).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d4-d19
File spambase.csv was not processed for columns (d4,d19).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.


File spambase.csv was not processed for columns (d7,d34).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d7-d35.csv
File was already processed. Skipping: spambase_d7-d36.csv
File was already processed. Skipping: spambase_d7-d37.csv
d7-d38
File spambase.csv was not processed for columns (d7,d38).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d7-d39.csv
File was already processed. Skipping: spambase_d7-d40.csv
d7-d41
File spambase.csv was not processed for columns (d7,d41).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d7-d42.csv
File was already processed. Skipping: spambase_d7-d43.csv
File was already processed. Skipping: spambase_d7-d44.csv
File was already processed. Skipping: spambase_d7-d45.csv
File was already processed. Skipping: spambase_d7-d46

File spambase.csv was not processed for columns (d11,d34).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d11-d35.csv
File was already processed. Skipping: spambase_d11-d36.csv
File was already processed. Skipping: spambase_d11-d37.csv
d11-d38
File spambase.csv was not processed for columns (d11,d38).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d11-d39.csv
File was already processed. Skipping: spambase_d11-d40.csv
d11-d41
File spambase.csv was not processed for columns (d11,d41).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d11-d42.csv
File was already processed. Skipping: spambase_d11-d43.csv
File was already processed. Skipping: spambase_d11-d44.csv
File was already processed. Skipping: spambase_d11-d45.csv
File was already processed. Skipping: s

File spambase.csv was not processed for columns (d15,d22).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d15-d23.csv
File was already processed. Skipping: spambase_d15-d24.csv
File was already processed. Skipping: spambase_d15-d25.csv
File was already processed. Skipping: spambase_d15-d26.csv
File was already processed. Skipping: spambase_d15-d27.csv
File was already processed. Skipping: spambase_d15-d28.csv
d15-d29
File spambase.csv was not processed for columns (d15,d29).
float division by zero

File was already processed. Skipping: spambase_d15-d30.csv
File was already processed. Skipping: spambase_d15-d31.csv
d15-d32
File spambase.csv was not processed for columns (d15,d32).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d15-d33.csv
d15-d34
File spambase.csv was not processed for columns (d15,d34).
Found array with 0 sample(s

File spambase.csv was not processed for columns (d18,d48).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d18-d49.csv
File was already processed. Skipping: spambase_d18-d50.csv
File was already processed. Skipping: spambase_d18-d51.csv
File was already processed. Skipping: spambase_d18-d52.csv
File was already processed. Skipping: spambase_d18-d53.csv
File was already processed. Skipping: spambase_d18-d54.csv
File was already processed. Skipping: spambase_d18-d55.csv
File was already processed. Skipping: spambase_d18-d56.csv
File was already processed. Skipping: spambase_d18-d57.csv
File was already processed. Skipping: spambase_d19-d20.csv
File was already processed. Skipping: spambase_d19-d21.csv
d19-d22
File spambase.csv was not processed for columns (d19,d22).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d19-d23.csv
File was

File spambase.csv was not processed for columns (d22,d30).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d22-d31
File spambase.csv was not processed for columns (d22,d31).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d22-d32
File spambase.csv was not processed for columns (d22,d32).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d22-d33
File spambase.csv was not processed for columns (d22,d33).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d22-d34
File spambase.csv was not processed for columns (d22,d34).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d22-d35
File spambase.csv was not processed for columns (d22,d35).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d22-d36
File spambase.csv was not processed for columns (d22,d36).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 

File spambase.csv was not processed for columns (d28,d41).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d28-d42.csv
File was already processed. Skipping: spambase_d28-d43.csv
File was already processed. Skipping: spambase_d28-d44.csv
File was already processed. Skipping: spambase_d28-d45.csv
File was already processed. Skipping: spambase_d28-d46.csv
d28-d47
File spambase.csv was not processed for columns (d28,d47).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d28-d48
File spambase.csv was not processed for columns (d28,d48).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: spambase_d28-d49.csv
File was already processed. Skipping: spambase_d28-d50.csv
File was already processed. Skipping: spambase_d28-d51.csv
File was already processed. Skipping: spambase_d28-d52.csv
File was already processed. Skipping: s

File spambase.csv was not processed for columns (d32,d44).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d32-d45
File spambase.csv was not processed for columns (d32,d45).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d32-d46
File spambase.csv was not processed for columns (d32,d46).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d32-d47
File spambase.csv was not processed for columns (d32,d47).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d32-d48
File spambase.csv was not processed for columns (d32,d48).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d32-d49
File spambase.csv was not processed for columns (d32,d49).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d32-d50
File spambase.csv was not processed for columns (d32,d50).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 

File spambase.csv was not processed for columns (d38,d49).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d38-d50
File spambase.csv was not processed for columns (d38,d50).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d38-d51
File spambase.csv was not processed for columns (d38,d51).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d38-d52
File spambase.csv was not processed for columns (d38,d52).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d38-d53
File spambase.csv was not processed for columns (d38,d53).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d38-d54
File spambase.csv was not processed for columns (d38,d54).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d38-d55
File spambase.csv was not processed for columns (d38,d55).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 

year-d2
File efashion_origClassLabels.csv was not processed for columns (year,d2).
float division by zero

year-d3
File efashion_origClassLabels.csv was not processed for columns (year,d3).
float division by zero

year-d4
File efashion_origClassLabels.csv was not processed for columns (year,d4).
float division by zero

File was already processed. Skipping: efashion_origClassLabels_d2-d3.csv
File was already processed. Skipping: efashion_origClassLabels_d2-d4.csv
File was already processed. Skipping: efashion_origClassLabels_d3-d4.csv
Processing file 51/104. Name: white_ballance_origClassLabels.csv
File was already processed. Skipping: white_ballance_origClassLabels_height-lengthe.csv
File was already processed. Skipping: white_ballance_origClassLabels_height-area.csv
File was already processed. Skipping: white_ballance_origClassLabels_height-eccen.csv
File was already processed. Skipping: white_ballance_origClassLabels_height-p_black.csv
File was already processed. Skipping: white_ball

File cereal.csv was not processed for columns (Fibre,Display_shelf).
float division by zero

Fibre-Potassium
File cereal.csv was not processed for columns (Fibre,Potassium).
float division by zero

File was already processed. Skipping: cereal_Complex_carbohydrates-Sugars.csv
File was already processed. Skipping: cereal_Complex_carbohydrates-Display_shelf.csv
File was already processed. Skipping: cereal_Complex_carbohydrates-Potassium.csv
Sugars-Display_shelf
File cereal.csv was not processed for columns (Sugars,Display_shelf).
float division by zero

Sugars-Potassium
File cereal.csv was not processed for columns (Sugars,Potassium).
float division by zero

File was already processed. Skipping: cereal_Display_shelf-Potassium.csv
Processing file 69/104. Name: cars03Cropped_d3.csv
d1-d2
Classes: 12
File cars03Cropped_d3.csv was not processed for columns (d1,d2).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

d1-d3
Classes: 12
File cars03Cropped_d3.csv was no

Processing file 75/104. Name: world_11d.csv
File was already processed. Skipping: world_11d_d1-d2.csv
File was already processed. Skipping: world_11d_d1-d3.csv
File was already processed. Skipping: world_11d_d1-d4.csv
File was already processed. Skipping: world_11d_d1-d5.csv
File was already processed. Skipping: world_11d_d1-d6.csv
File was already processed. Skipping: world_11d_d1-d7.csv
File was already processed. Skipping: world_11d_d1-d8.csv
File was already processed. Skipping: world_11d_d1-d9.csv
File was already processed. Skipping: world_11d_d1-d10.csv
File was already processed. Skipping: world_11d_d1-d11.csv
File was already processed. Skipping: world_11d_d1-d12.csv
File was already processed. Skipping: world_11d_d2-d3.csv
File was already processed. Skipping: world_11d_d2-d4.csv
File was already processed. Skipping: world_11d_d2-d5.csv
File was already processed. Skipping: world_11d_d2-d6.csv
File was already processed. Skipping: world_11d_d2-d7.csv
File was already processe

File cars03Cropped_d2.csv was not processed for columns (d1,d22).
Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

File was already processed. Skipping: cars03Cropped_d2_d2-d3.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d4.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d5.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d6.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d7.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d8.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d9.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d19.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d11.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d12.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d13.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d14.csv
File was already processed. Skipping: cars03Cropped_d2_d2-d15.csv
File

KeyboardInterrupt: 

In [None]:
pd.read_csv()