In [1]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from missingpy import KNNImputer
import copy
from sklearn.preprocessing import LabelEncoder;
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from hyperopt import tpe, hp, fmin
from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score,f1_score


In [2]:
df=pd.read_csv('train.csv')

In [3]:
df.drop('Name', axis=1, inplace=True)#must be taken care in EDA
df.drop('PassengerId', axis=1, inplace=True)  # must be taken care in EDA

In [4]:
# 80% of counts are 

class columnTypeIdentification:
    def __init__(self, df):
        self.dtypes={}
        self.df=df
        for i in self.df.columns:
            self.dtypes[i]=(self.df[i].dtypes)
#         print(self.dtypes)
        self.colTypes={'Categorical': [], 'Text':[], 'Numeric': []}
        self.detectingColTypes()
        target_type=''
        
        for i in self.colTypes.keys():
            if y in self.colTypes[i]:
                target_type=i
                break
        self.target_type=target_type
    
    def detectingColTypes(self):
        for i in self.dtypes.keys():
            if self.dtypes[i]=='O':
                if (df[i].fillna('',axis=0).apply(lambda x: len(x))).quantile(q=0.95)<20:
                    self.colTypes['Categorical'].append(i)
                else:
                    self.colTypes['Text'].append(i)
            else: 
                distinctValues = self.df[i].nunique()
                if distinctValues < int((self.df[i].shape[0])*0.05):
                    self.colTypes['Categorical'].append(i)
                else:
                    self.colTypes['Numeric'].append(i)
    

In [5]:
class NullHandling:
  
    def __init__(self, df,colTypes,y):
        self.dict_isnull = (df.isnull().sum() / len(df)).to_dict()
        self.df=df
        self.y=y
        self.colTypes=copy.deepcopy(colTypes)
        self.remove_columns()
 
    #Removing columns which have more than 75 percent nulls
    def remove_columns(self):
        cols_remove=[]
        for key in self.dict_isnull:
            if(self.dict_isnull[key]>0.75):
                cols_remove.append(key)        
        if not cols_remove:
            return self.colTypes
        else:
            for i in cols_remove:
                for j in self.colTypes.keys():
                    if i in self.colTypes[j]:
                        self.colTypes[j].remove(i)
        if self.y in cols_remove:
            cols_remove.remove(self.y)
        
        self.df.drop(cols_remove,axis=1, inplace=True)
        
#         return self.colTypes
        

    #Imputing the null values with the mean value of the column
    def continuous_impute_mean(self):
        df_temp=self.df.copy()
        imputer = SimpleImputer(strategy='mean')
        df_temp[self.colTypes['Numeric']] = imputer.fit_transform(df_temp[self.colTypes['Numeric']])
        return df_temp

    #Imputing the null values using KNN
    def continuous_impute_knn(self):
        df_temp=self.df.copy()
        imputer = KNNImputer(n_neighbors=5) 
        df_temp[self.colTypes['Numeric']] = imputer.fit_transform(df_temp[self.colTypes['Numeric']])
        return df_temp
    
     #Common method calling all the impute functions   
    def impute(self,strategy,fill_value = 0, fill_categorical = '-1'):
        df_temp=self.df
        
        #Dealing with Continuous cols
        if strategy is None:
            df_temp[self.colTypes['Numeric']]=df_temp[self.colTypes['Numeric']].fillna(fill_value)
        elif strategy == 'mean':
            self.continuous_impute_mean()
        elif strategy == 'knn':
            self.continuous_impute_knn()
            
        #dealing with categorical Cols 
        df_temp[self.colTypes['Categorical']]=df_temp[self.colTypes['Categorical']].fillna(fill_categorical)
        
        return df_temp


In [6]:
class FeatureReduction:

    def __init__(self,df,colTypes,y,target_type):
        self.df=df
        self.colTypes=copy.deepcopy(colTypes)
        self.y=y
        self.target_type=target_type
        self.colTypes[self.target_type].remove(self.y)
        #self.all_dfs=[]
        self.pearson_corr()

    #removing the continuous column with a correlation of above 0.8
    def pearson_corr(self):
        self.colTypes['Numeric']=set(self.colTypes['Numeric']).intersection(set(self.df.columns))
        corr=self.df[list(self.colTypes['Numeric'])].corr(method="pearson").abs()
        upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(np.bool))
        to_drop = [column for column in upper.columns if any(upper[column] > 0.80)]
        self.df.drop(to_drop, axis=1,inplace=True)
        self.colTypes['Numeric'] = [x for x in self.colTypes['Numeric'] if x not in to_drop]
        #self.all_dfs.append(self.df)
        return self.df
    
    
    def select_method(self,chooser_):
        if(chooser_=='pearson'):
            df = self.pearson_corr()
            return df
            
        

   # def return_dfs(self):
        #return self.all_dfs
        


In [7]:

class OutlierHandling:
    def __init__(self, df,colTypes,y,target_type):
        self.df = df.copy()
        self.y=y
        self.target_type=target_type
        self.colTypes = copy.deepcopy(colTypes)
        self.colTypes[self.target_type].remove(self.y)

        self.colTypes['Numeric'] = set(colTypes['Numeric']).intersection(set(df.columns))

        #self.all_dfs = []
        self.capping_outlier()
        self.remove_outlier()
        self.zscore_outlier()

    # capping the values to the predefined lower and upper percentiles
    def capping_outlier(self, lowerperc=0.01, higherperc=0.99):
        df = self.df[list(self.colTypes['Numeric'])].copy()
        df_out = self.df
        for col in df.columns:
            percentiles = df[col].quantile([lowerperc, higherperc]).values
            df[col][df[col] <= percentiles[0]] = percentiles[0]
            df[col][df[col] >= percentiles[1]] = percentiles[1]
            df_out[list(self.colTypes['Numeric'])] = df
        #self.all_dfs.append(df_out)
        
        return df_out

    # removing the values which are not within iqr range
    def remove_outlier(self):
        df_out = self.df[list(self.colTypes['Numeric'])].copy()
        df_in = self.df.copy(deep=True)
        df_in.drop(list(self.colTypes['Numeric']), axis=1, inplace=True)
        for col_name in df_out.columns:
            q1 = df_out[col_name].quantile(0.25)
            q3 = df_out[col_name].quantile(0.75)
            iqr = q3 - q1
            lower = q1 - 1.5 * iqr
            upper = q3 + 1.5 * iqr
            df_out = df_out.loc[(df_out[col_name] > lower) & (df_out[col_name] < upper)]
        df_final = pd.concat([df_in, df_out], axis=1, join='inner')
        #self.all_dfs.append(df_final)
        
        return df_final

    # removing values which are less than predefined zscore value
    def zscore_outlier(self, threshold=3):
        l = []
        df1 = self.df.copy(deep=True)
        df = self.df[list(self.colTypes['Numeric'])].copy(deep=True)
        df1.drop(self.colTypes['Numeric'], axis=1, inplace=True)
        df_out = pd.DataFrame()
        for i in df.columns:
            temp = []
            mean_1 = np.mean(df[i])
            std_1 = np.std(df[i])
            for y in df[i]:
                z_score = (y - mean_1) / std_1
                if np.abs(z_score) < threshold:
                    temp.append(y)
            df_temp = pd.DataFrame(temp)
            l.append(df_temp)
        df_out = pd.concat(l, axis=1, join='inner')
        df_out.columns = df.columns
        df_final = pd.concat([df1, df_out], axis=1, join='inner')
        #self.all_dfs.append(df_final)
        
        return df_final
    
    def select_method(self,chooser_):
        if(chooser_=='removing'):
            df=self.remove_outlier()
            return df
        if(chooser_=='capping'):
            df=self.capping_outlier()
            return df
        if(chooser_=='zscore'):
            df=self.zscore_outlier()
            return df
    
    ##def return_dfs(self):
        #return self.all_dfs

In [8]:
class Encoding:
    def __init__(self,df,colTypes,y):
        self.y=y
        self.colTypes = copy.deepcopy(colTypes)
        # fetching column types for the columns in the current df
        self.colTypes['Categorical'] = set(df.columns).intersection(set(colTypes['Categorical']))
        # removing the target column for the list of categorical columns
        if self.y in self.colTypes['Categorical']:
            self.colTypes['Categorical'].remove(self.y)

        self.df = df.copy()
        #self.all_dfs = []
        self.one_hot_encoding()
        self.label_encode()

    # encoding the categorical columns excluding the target column
    def one_hot_encoding(self):
        df1 = self.df.copy(deep=True)
        df_y = pd.DataFrame()
        df1 = pd.get_dummies(df1, drop_first=True, columns=list(self.colTypes['Categorical']))
        #self.all_dfs.append(self.target_encode(df1))
        
        return self.target_encode(df1)

    # encoding the categorical columns excluding the target column
    def label_encode(self):
        df1 = self.df.copy(deep=True)
        df_y = pd.DataFrame()
        for x in self.colTypes['Categorical']:
            df1[x] = LabelEncoder.fit_transform(df1, y=df1[x])
        #self.all_dfs.append(self.target_encode(df1))
        
        return self.target_encode(df1)

    # encoding the categorical target column
    def target_encode(self, t_df):
        if self.y not in self.colTypes['Numeric']:
            t_df[self.y] = LabelEncoder.fit_transform(t_df, y=t_df[self.y])
        return t_df
       
        
    def select_method(self,chooser_):
        if(chooser_=='one_hot'):
            df=self.one_hot_encode()
            return df
        if(chooser_=='label_encode'):
            df = self.label_encode()
            return df

        
    ##def return_dfs(self):
       # return self.all_dfs


In [9]:
from functools import partial
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from hyperopt import tpe, hp, fmin, space_eval, STATUS_OK, Trials
from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score, f1_score
import copy
import pandas as pd
import numpy as np

#Parameters
#List of dfs, targetCol and the metric under consideration

class Classification:
    def __init__(self, df, targetCol, metric, colTypes, test_size=0.2):
        self.df = df
        self.y = targetCol
        self.metric = metric
        self.colTypes=colTypes
        self.test_size=test_size
        # self.test_score = {}
        self.final_results={}

        self.execute()

    #Required to be passed in fmin of hyperopt
    def objective_func(self, args):
        clf=None

        if args['model'] == RandomForestClassifier:
            n_estimators = args['param']['n_estimators']
            max_depth = args['param']['max_depth']
            max_features = args['param']['max_features']
            clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
        elif args['model'] == LogisticRegression:
            penalty = args['param']['penalty']
            C = args['param']['C']
            solver = args['param']['solver']
            clf = LogisticRegression(penalty=penalty, C=C, solver=solver)

        clf.fit(self.x_train, self.y_train)
        y_pred_train = clf.predict(self.x_train)
        y_pred_test = clf.predict(self.x_test)
        loss = log_loss(self.y_train, y_pred_train)
        score = self.metric(y_pred_test, self.y_test)
        # print("Test Score:", self.metric(y_pred_test, self.y_test))
        # print("Train Score:", self.metric(y_pred_train, self.y_train))
        # print("\n===============")
        # return {'loss': score,'status': STATUS_OK,'model': clf}
        return (-score)

    def execute(self):
        #using Hyperopt for parameter tuning
        self.space = hp.choice('classifier', [
                                            {'model': RandomForestClassifier,
                                            'param': {'max_depth': hp.choice('max_depth', range(1, 20)),
                                            'max_features': hp.choice('max_features', range(1, 5)),
                                            'n_estimators': hp.choice('n_estimators', range(1, 20)),
                                             'criterion': hp.choice('criterion', ["gini", "entropy"])
                                                    }
                                            },
                                            {'model': LogisticRegression,
                                             'param': {'penalty': hp.choice('penalty', ['l2']),
                                                       'C': hp.lognormal('C', 0, 1),
                                                       'solver': hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'])}
                                             }
                                        ])

        score_key = str(self.metric) + '_score'


            #df.drop(self.colTypes['Text'], axis=1, inplace=True)#must be taken care in EDA
            #df.drop('PassengerId', axis=1, inplace=True)  # must be taken care in EDA
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(self.df.drop(self.y, axis=1), self.df[self.y], test_size=self.test_size)
        trials = Trials()

        hyperparam = space_eval(self.space,fmin(self.objective_func, self.space, trials=trials, algo=tpe.suggest, max_evals=100))
        score = -min(trials.losses())


        self.final_results['Hyperparameter']=hyperparam
        self.final_results[score_key]=score
        #self.final_results['Data']=self.df


    def return_results(self):
        return self.final_results


In [139]:
import itertools
import threading
s=[ [True], ['none','mean'], ['e', 'f'] ]

#s=[[True],[None,'mean','knn'],['pearson'],['capping','removing','zscore'],['one_hot','label_encode','target_encode']]
x=list(itertools.product(*s))

In [143]:
from functools import partial

def print_(queue,a,b,c):
    print(a)
    print(b)
    print(c)

    


cnt_=0
for list_ in x:
    #cnt_=cnt_+1
    #a, b, c = [list_[i] for i in range(len(list_))] 
    #func_=partial(threading_,df)
    #t1 = threading.Thread(target=func_,args=([list_[i] for i in range(len(list_))]))
    #z=1
    func_=partial(print_,queue)
    t1 = threading.Thread(target=func_,args=([list_[i] for i in range(len(list_))]))  
    t1.start()

True
TruenoneTrue


noneTruee
mean


meanf
e

f


In [15]:
metric=accuracy_score

class FlowClass:
    def __init__(self,df,y,cnt_):
        self.df=df
        self.y=y
        self.cnt_=cnt_

    def threading_(self,queue,col_identification,null_handling,feature_reduction,outlier_handling,encoding,modelling):
        df=self.df.copy()
        y=self.y
        #cnt_=self.cnt_
        path=[]
        d_ = {}
        
        if(col_identification):
            colIdentObj = columnTypeIdentification(df)
            colTypes = colIdentObj.colTypes
            target_type = colIdentObj.target_type
            z='col_identification'
            path.append(z)

            
        if(null_handling!='Null'):
            nullHndlngObj = NullHandling(df,colTypes,y)
            df = nullHndlngObj.impute(null_handling)
            #z='null_handling_df'+str(cnt_)+'.csv'
            #df.to_csv('null_handling_df.csv')
            z=null_handling
            path.append(z)
            
            
        if(feature_reduction!='Null'):
            fRdctionObj = FeatureReduction(df,colTypes,y,target_type)
            df=fRdctionObj.select_method(feature_reduction)
            #df.to_csv('feature_reduction_df.csv')
            z=feature_reduction
            path.append(z)
        
        
        if(outlier_handling!= 'Null'):
            OH = OutlierHandling(df,colTypes,y,target_type)
            df=OH.select_method(outlier_handling)
            #df.to_csv('outlier_handling_df.csv')
            z=outlier_handling
            path.append(z)
            
        
        if(encoding!= 'Null'):
            en = Encoding(df,colTypes,y)
            df=en.select_method(encoding)
            #df.to_csv('encoding_df.csv')
            z=encoding
            path.append(z)
            
            
        if(modelling!='Null'):
            metric = modelling
            clf = Classification(df,y,metric,colTypes,test_size = 0.2)
            param=clf.return_results()
            z=modelling
            path.append(z)
        
        d_['df']=df
        d_['path']=path
        d_['model_results'] = param
        queue.put(d_)
        
    



In [16]:
import itertools
import threading
import queue
from functools import partial

#s=[[True],[None,'mean','knn'],['pearson'],['capping','removing','zscore'],['one_hot','label_encode']]
s=[[True],[None,'mean'],['Null'],['removing'],['label_encode'],[accuracy_score]]
x=list(itertools.product(*s))


y = 'Survived'
cnt_=0
queue = queue.Queue()
thread_list = []
f_list = []
for list_ in x:
    cnt_ = cnt_+1
    obj = FlowClass(df,y,cnt_)
    func_=partial(obj.threading_,queue)
    t1 = threading.Thread(target=func_,args=([list_[i] for i in range(len(list_))]))
    t1.start()
    thread_list.append(t1)


for t in thread_list:
    t.join()
    #df = queue.get()
    #z = 'final_df'+str(ct_)+'.csv'
    #df.to_csv(z)
    f_list.append(queue.get())
    


  self.classes_, y = _encode(y, encode=True)


  0%|                                                                          | 0/100 [00:00<?, ?trial/s, best loss=?]
                                                                                                                       
  1%|▍                                              | 1/100 [00:00<00:14,  6.66trial/s, best loss: -0.8516129032258064]




  2%|9                                              | 2/100 [00:00<00:10,  9.60trial/s, best loss: -0.7786885245901639]
                                                                                                                       
  2%|▉                                              | 2/100 [00:00<00:14,  6.66trial/s, best loss: -0.8516129032258064]





  4%|#8                                             | 4/100 [00:00<00:10,  9.39trial/s, best loss: -0.7786885245901639]
                                                                                                                       
  2%|▉                                              | 2/100 [00:00<00:14,  6.66trial/s, best loss: -0.8516129032258064]




  5%|##3                                            | 5/100 [00:00<00:11,  8.35trial/s, best loss: -0.7786885245901639]
                                                                                                                       
  3%|█▍                                             | 3/100 [00:00<00:16,  5.83trial/s, best loss: -0.8516129032258064]




  6%|##8                                            | 6/100 [00:00<00:13,  7.02trial/s, best loss: -0.7786885245901639]
                                                                                                                       
  5%|██▎                                            | 5/100 [00:00<00:16,  5.89trial/s, best loss: -0.8516129032258064]




  7%|###2                                           | 7/100 [00:00<00:14,  6.50trial/s, best loss: -0.7786885245901639]
                                                                                                                       
  7%|███▎                                           | 7/100 [00:01<00:13,  6.68trial/s, best loss: -0.8516129032258064]




  9%|####2                                          | 9/100 [00:01<00:12,  7.54trial/s, best loss: -0.7786885245901639]
                                                                                                                       
  8%|███▊                                           | 8/100 [00:01<00:13,  6.77trial/s, best loss: -0.8516129032258064]




 10%|####6                                         | 10/100 [00:01<00:11,  7.71trial/s, best loss: -0.7786885245901639]
                                                                                                                       
  8%|███▊                                           | 8/100 [00:01<00:13,  6.77trial/s, best loss: -0.8516129032258064]




 11%|#####                                         | 11/100 [00:01<00:13,  6.82trial/s, best loss: -0.7786885245901639]
                                                                                                                       
 13%|######1                                        | 13/100 [00:01<00:10,  8.12trial/s, best loss: -0.819672131147541]
                                                                                                                       
  8%|███▊                                           | 8/100 [00:01<00:13,  6.77trial/s, best loss: -0.8516129032258064]




 12%|█████▌                                        | 12/100 [00:01<00:16,  5.45trial/s, best loss: -0.8516129032258064]




 14%|######5                                        | 14/100 [00:01<00:16,  5.14trial/s, best loss: -0.819672131147541]
                                                                                                                       
 14%|██████▍                                       | 14/100 [00:02<00:12,  6.79trial/s, best loss: -0.8516129032258064]




 15%|#######                                        | 15/100 [00:02<00:15,  5.40trial/s, best loss: -0.819672131147541]
                                                                                                                       
 14%|██████▍                                       | 14/100 [00:02<00:12,  6.79trial/s, best loss: -0.8516129032258064]




 17%|#######9                                       | 17/100 [00:02<00:12,  6.49trial/s, best loss: -0.819672131147541]
                                                                                                                       
 14%|██████▍                                       | 14/100 [00:02<00:12,  6.79trial/s, best loss: -0.8516129032258064]




 19%|########9                                      | 19/100 [00:02<00:10,  7.69trial/s, best loss: -0.819672131147541]
                                                                                                                       
 21%|#########8                                     | 21/100 [00:02<00:08,  8.95trial/s, best loss: -0.819672131147541]
                                                                                                                       
 23%|##########5                                   | 23/100 [00:02<00:07, 10.17trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 25%|###########5                                  | 25/100 [00:02<00:06, 11.17trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 27%|############4                      




 29%|#############3                                | 29/100 [00:03<00:06, 11.50trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 31%|##############2                               | 31/100 [00:03<00:06, 11.25trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 33%|###############1                              | 33/100 [00:03<00:05, 12.30trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 35%|################                              | 35/100 [00:03<00:05, 11.68trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 37%|#################                  




 39%|#################9                            | 39/100 [00:04<00:06,  9.74trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 41%|##################8                           | 41/100 [00:04<00:05, 10.03trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 43%|###################7                          | 43/100 [00:04<00:05, 10.47trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 16%|███████▎                                      | 16/100 [00:04<00:34,  2.42trial/s, best loss: -0.8516129032258064]




 17%|███████▊                                      | 17/100 [00:04<00:38,  2.15trial/s, best loss: -0.8516129032258064]




 19%|████████▋                                     | 19/100 [00:04<00:28,  2.85trial/s, best loss: -0.8516129032258064]




 45%|####################7                         | 45/100 [00:05<00:08,  6.65trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 47%|#####################6                        | 47/100 [00:05<00:08,  6.57trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 48%|######################                        | 48/100 [00:05<00:09,  5.41trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 49%|######################5                       | 49/100 [00:05<00:08,  6.11trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 30%|█████████████▊                     




 50%|#######################                       | 50/100 [00:06<00:11,  4.33trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 51%|#######################4                      | 51/100 [00:06<00:11,  4.41trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 52%|#######################9                      | 52/100 [00:06<00:11,  4.13trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 38%|█████████████████▊                             | 38/100 [00:06<00:06,  9.24trial/s, best loss: -0.864516129032258]




 53%|########################3                     | 53/100 [00:06<00:12,  3.72trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 54%|########################8                     | 54/100 [00:07<00:11,  4.05trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 56%|#########################7                    | 56/100 [00:07<00:08,  4.98trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 57%|##########################2                   | 57/100 [00:07<00:11,  3.75trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 58%|##########################6        




 65%|#############################9                | 65/100 [00:09<00:07,  4.47trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 66%|##############################3               | 66/100 [00:09<00:06,  5.25trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 50%|███████████████████████▌                       | 50/100 [00:09<00:07,  6.38trial/s, best loss: -0.864516129032258]




 67%|##############################8               | 67/100 [00:09<00:05,  5.73trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 68%|###############################2              | 68/100 [00:09<00:05,  6.09trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 70%|################################1             | 70/100 [00:09<00:04,  6.61trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 71%|################################6             | 71/100 [00:09<00:04,  6.70trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 55%|█████████████████████████▊         




 72%|#################################1            | 72/100 [00:10<00:05,  5.33trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 73%|#################################5            | 73/100 [00:10<00:05,  5.33trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 74%|##################################            | 74/100 [00:10<00:04,  5.91trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 75%|##################################5           | 75/100 [00:10<00:03,  6.69trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 76%|##################################9




 67%|███████████████████████████████▍               | 67/100 [00:11<00:05,  6.41trial/s, best loss: -0.864516129032258]




 82%|#####################################7        | 82/100 [00:11<00:02,  6.09trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 83%|######################################1       | 83/100 [00:11<00:02,  5.90trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 84%|######################################6       | 84/100 [00:11<00:02,  6.12trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 86%|#######################################5      | 86/100 [00:12<00:02,  5.81trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 87%|###################################




 95%|###########################################6  | 95/100 [00:14<00:01,  3.20trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 96%|############################################1 | 96/100 [00:14<00:01,  3.79trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 97%|############################################6 | 97/100 [00:14<00:00,  4.27trial/s, best loss: -0.8360655737704918]
                                                                                                                       
 99%|#############################################5| 99/100 [00:14<00:00,  5.47trial/s, best loss: -0.8360655737704918]
                                                                                                                       
100%|###################################




 92%|███████████████████████████████████████████▏   | 92/100 [00:15<00:00,  9.17trial/s, best loss: -0.864516129032258]




 98%|██████████████████████████████████████████████ | 98/100 [00:15<00:00, 12.77trial/s, best loss: -0.864516129032258]




100%|██████████████████████████████████████████████| 100/100 [00:15<00:00, 13.01trial/s, best loss: -0.864516129032258]


In [17]:
f_list

[{'df':      Survived  Pclass  Sex  SibSp  Parch  Ticket  Embarked   Age     Fare
  0           0       2    1      1      0     375         2  22.0   7.2500
  2           1       2    0      0      0     479         2  26.0   7.9250
  3           1       0    0      1      0      32         2  35.0  53.1000
  4           0       2    1      0      0     340         2  35.0   8.0500
  6           0       0    1      0      0      52         2  54.0  51.8625
  7           0       2    1      3      1     294         2   2.0  21.0750
  8           1       2    0      0      2     258         2  27.0  11.1333
  9           1       1    0      1      0      91         0  14.0  30.0708
  10          1       2    0      1      1     436         2   4.0  16.7000
  11          1       0    0      0      0      22         2  58.0  26.5500
  12          0       2    1      0      0     385         2  20.0   8.0500
  13          0       2    1      1      5     247         2  39.0  31.2750
  14  