In [1]:
# make imports
import pandas as pd

import numpy as np

from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import extract_relevant_features

# import xgboost

# from sklearn.preprocessing import StandardScaler
# from sklearn.metrics import explained_variance_score
# from sklearn.metrics import mean_absolute_error
# from sklearn.feature_selection import SelectFromModel
# from sklearn.model_selection import GridSearchCV, TimeSeriesSplit

import matplotlib.pyplot as plt
%matplotlib

Using matplotlib backend: Qt5Agg


In [2]:
read_dtypes = {'days_from': 'float32',
 'fatalities': 'float32',
 'id_col': 'object',
 'max_rainfall': 'float32',
 'mean_rainfall': 'float32',
 'min_rainfall': 'float32',
 'neighbor_fatalities': 'float32',
 'neighbor_max_rainfall': 'float32',
 'neighbor_mean_rainfall': 'float32',
 'neighbor_min_rainfall': 'float32',
 'neighbor_new_cases': 'float32',
 'neighbor_new_deaths': 'float32',
 'new_cases': 'float32',
 'new_deaths': 'float32'}

In [8]:
# rolled time series for different time intervals in the format tsfresh requires

#week_1_rolled_timeseries = pd.read_csv('/Users/Rohil/Documents/iGEM/yemen/feature_engineering/exhaustive_feature_extraction_prep/week_1_rolled_timeseries.csv', dtype=read_dtypes)

week_2_rolled_timeseries = pd.read_csv('/Users/Rohil/Documents/iGEM/yemen/feature_engineering/exhaustive_feature_extraction_prep/week_2_rolled_timeseries.csv' )

week_4_rolled_timeseries = pd.read_csv('/Users/Rohil/Documents/iGEM/yemen/feature_engineering/exhaustive_feature_extraction_prep/week_4_rolled_timeseries.csv' )



In [14]:
week_6_rolled_timeseries = pd.read_csv('/Users/Rohil/Documents/iGEM/yemen/feature_engineering/exhaustive_feature_extraction_prep/week_6_rolled_timeseries.csv' )

In [4]:
week_2_rolled_timeseries.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 83475 entries, 0 to 83474
Data columns (total 15 columns):
date                      83475 non-null object
days_from                 83475 non-null float64
fatalities                83475 non-null float64
id_col                    83475 non-null object
max_rainfall              83475 non-null float64
mean_rainfall             83475 non-null float64
min_rainfall              83475 non-null float64
neighbor_fatalities       83475 non-null float64
neighbor_max_rainfall     83475 non-null float64
neighbor_mean_rainfall    83475 non-null float64
neighbor_min_rainfall     83475 non-null float64
neighbor_new_cases        83475 non-null float64
neighbor_new_deaths       83475 non-null float64
new_cases                 83475 non-null float64
new_deaths                83475 non-null float64
dtypes: float64(13), object(2)
memory usage: 19.4 MB


In [5]:
week_1_rolled_timeseries.shape

(45108, 15)

In [97]:
week_2_rolled_timeseries.shape

(83475, 15)

In [6]:
week_4_rolled_timeseries.shape

(157122, 16)

In [7]:
week_6_rolled_timeseries.shape

(226653, 16)

In [7]:
feature_timeframe_dict = {1 : week_1_rolled_timeseries}#, 2 : week_2_rolled_timeseries}, 
                          # 4 : week_4_rolled_timeseries, 6 : week_6_rolled_timeseries}

In [8]:
def process_extracted_features(extracted_features, weeks_back):
    
    rename_dict = {val:(val + '_week_' + str(weeks_back)) for val in extracted_features.columns}

    extracted_features = extracted_features.rename(rename_dict)
    
    index_reset = extracted_features.reset_index().id.apply(pd.Series)
    
    extracted_features.reset_index(drop = True, inplace=True)
    
    extracted_features['days_from'] = index_reset[0].map(int)
    extracted_features['gov_iso'] = index_reset[1]
    
    extracted_features.days_from = extracted_features.days_from.map(int)
    
    extracted_features = extracted_features.merge(feature_timeseries[['days_from', 'date']].drop_duplicates(), on = ['days_from']).drop('days_from', axis=1)
    
    return (extracted_features)

In [9]:
def extract_and_process_features(my_rolled_timeseries, weeks_back, column_id = 'id_col', column_sort = 'days_from'):
    
    rolled_timeseries = my_rolled_timeseries.drop(['date'], axis=1)
        
    extracted_features = extract_features(rolled_timeseries, column_id = column_id, column_sort = column_sort)
    
    impute(extracted_features)
    
    extracted_features = extracted_features.apply(pd.to_numeric, downcast = 'float')
    
    print (extracted_features.info(memory_usage='deep'))
    
    extracted_features.to_csv('/Users/Rohil/Documents/iGEM/yemen/exhaustive_extracted_features_week_' + str(weeks_back) + '_unprocessed.csv.gz', compression='gzip', encoding='utf-8')
    
    processed_extracted_features = process_extracted_features(extracted_features, weeks_back)
    
    return (processed_extracted_features)

In [10]:
def extract_all_features(feature_timeframe_dict):
    
    extracted_feature_timeframe_list = []
    
    for key, val in feature_timeframe_dict.items():
        
        timeframe_extracted_features = extract_and_process_features(val, key, column_id = 'id_col', column_sort = 'days_from')
        
        extracted_feature_timeframe_list.append(timeframe_extracted_features)
        
        print ("%d timeframes extracted" % (len(extracted_feature_timeframe_list)))
    
    all_extracted_features = pd.concat(extracted_feature_timeframe_list, axis=1)
    
    return (all_extracted_features)

In [11]:
all_extracted_features = extract_all_features(feature_timeframe_dict)

Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 20/20 [28:05<00:00, 84.29s/it]
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_10__attr_"rvalue"'
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_10__attr_"slope"'
 ... 'new_deaths__partial_autocorrelation__lag_9'
 'new_deaths__spkt_welch_density__coeff_5'
 'new_deaths__spkt_welch_density__coeff_8'] did not have any finite values. Filling with zeros.


<class 'pandas.core.frame.DataFrame'>
Index: 5712 entries, ('0', 'YE-AB') to ('99', 'YE-TA')
Columns: 9528 entries, fatalities__abs_energy to new_deaths__variance_larger_than_standard_deviation
dtypes: float32(9528)
memory usage: 208.0 MB
None


NameError: name 'weeks_back' is not defined

In [None]:
all_extracted_features.to_csv('/Users/Rohil/Documents/iGEM/yemen/exhaustive_extracted_features_week_1.csv', index = False)

In [None]:
#extracted_features_week_1 = extract_features(week_1_rolled_timeseries.drop(columns=['date']), column_id = 'id_col', column_sort = 'days_from')

Feature Extraction:   0%|                                                                       | 0/20 [00:00<?, ?it/s]

In [6]:
extracted_features_week_2 = extract_features(week_2_rolled_timeseries.drop(columns=['date']), column_id = 'id_col', column_sort = 'days_from')

impute(extracted_features_week_2)

extracted_features_week_2 = extracted_features_week_2.apply(pd.to_numeric, downcast = 'float')

print (extracted_features_week_2.info(memory_usage='deep'))

extracted_features_week_2.to_csv('/Users/Rohil/Documents/iGEM/yemen/exhaustive_extracted_features_week_2_unprocessed.csv.gz', compression='gzip', encoding='utf-8')

Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 20/20 [28:05<00:00, 84.30s/it]
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"rvalue"'
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"slope"'
 ... 'new_deaths__friedrich_coefficients__m_3__r_30__coeff_3'
 'new_deaths__max_langevin_fixed_point__m_3__r_30'
 'new_deaths__spkt_welch_density__coeff_8'] did not have any finite values. Filling with zeros.


<class 'pandas.core.frame.DataFrame'>
Index: 5712 entries, ('0', 'YE-AB') to ('99', 'YE-TA')
Columns: 9528 entries, fatalities__abs_energy to new_deaths__variance_larger_than_standard_deviation
dtypes: float32(9528)
memory usage: 208.0 MB
None


In [7]:
extracted_features_week_2

variable,fatalities__abs_energy,fatalities__absolute_sum_of_changes,"fatalities__agg_autocorrelation__f_agg_""mean""","fatalities__agg_autocorrelation__f_agg_""median""","fatalities__agg_autocorrelation__f_agg_""var""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""intercept""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""rvalue""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""slope""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""stderr""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_50__attr_""intercept""",...,new_deaths__time_reversal_asymmetry_statistic__lag_1,new_deaths__time_reversal_asymmetry_statistic__lag_2,new_deaths__time_reversal_asymmetry_statistic__lag_3,new_deaths__value_count__value_-inf,new_deaths__value_count__value_0,new_deaths__value_count__value_1,new_deaths__value_count__value_inf,new_deaths__value_count__value_nan,new_deaths__variance,new_deaths__variance_larger_than_standard_deviation
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"('0', 'YE-AB')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-AD')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,1.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-AM')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-BA')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-DA')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-DH')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-HD-AL')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,1.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-HJ')",1.773776e-07,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-HU')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
"('0', 'YE-IB')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000125,0.0,0.000000,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0


In [9]:
extracted_features_week_4 = extract_features(week_4_rolled_timeseries.drop(columns=['date']), column_id = 'id_col', column_sort = 'days_from')

impute(extracted_features_week_4)

extracted_features_week_4 = extracted_features_week_4.apply(pd.to_numeric, downcast = 'float')

print (extracted_features_week_4.info(memory_usage='deep'))

extracted_features_week_4.to_csv('/Users/Rohil/Documents/iGEM/yemen/exhaustive_extracted_features_week_4_unprocessed.csv.gz', compression='gzip', encoding='utf-8')

Feature Extraction: 100%|█████████████████████████████████████████████████████████████| 20/20 [41:54<00:00, 125.72s/it]
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"rvalue"'
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"slope"'
 ... 'new_deaths__friedrich_coefficients__m_3__r_30__coeff_2'
 'new_deaths__friedrich_coefficients__m_3__r_30__coeff_3'
 'new_deaths__max_langevin_fixed_point__m_3__r_30'] did not have any finite values. Filling with zeros.


<class 'pandas.core.frame.DataFrame'>
Index: 5712 entries, ('0', 'YE-AB') to ('99', 'YE-TA')
Columns: 9528 entries, fatalities__abs_energy to new_deaths__variance_larger_than_standard_deviation
dtypes: float32(9528)
memory usage: 208.0 MB
None


In [15]:
extracted_features_week_6 = extract_features(week_6_rolled_timeseries.drop(columns=['date']), column_id = 'id_col', column_sort = 'days_from')

impute(extracted_features_week_6)

extracted_features_week_6 = extracted_features_week_6.apply(pd.to_numeric, downcast = 'float')

print (extracted_features_week_6.info(memory_usage='deep'))

extracted_features_week_6.to_csv('/Users/Rohil/Documents/iGEM/yemen/exhaustive_extracted_features_week_6_unprocessed.csv.gz', compression='gzip', encoding='utf-8')

Feature Extraction: 100%|█████████████████████████████████████████████████████████████| 20/20 [44:29<00:00, 133.49s/it]
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"rvalue"'
 'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"slope"'
 ... 'new_deaths__friedrich_coefficients__m_3__r_30__coeff_2'
 'new_deaths__friedrich_coefficients__m_3__r_30__coeff_3'
 'new_deaths__max_langevin_fixed_point__m_3__r_30'] did not have any finite values. Filling with zeros.


<class 'pandas.core.frame.DataFrame'>
Index: 5712 entries, ('0', 'YE-AB') to ('99', 'YE-TA')
Columns: 9528 entries, fatalities__abs_energy to new_deaths__variance_larger_than_standard_deviation
dtypes: float32(9528)
memory usage: 208.0 MB
None


In [16]:
extracted_features_week_1 = pd.read_csv('/Users/Rohil/Documents/iGEM/yemen/exhaustive_extracted_features_week_1_unprocessed.csv.gz')

In [17]:
extracted_features_week_1

Unnamed: 0,id,fatalities__abs_energy,fatalities__absolute_sum_of_changes,"fatalities__agg_autocorrelation__f_agg_""mean""","fatalities__agg_autocorrelation__f_agg_""median""","fatalities__agg_autocorrelation__f_agg_""var""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""intercept""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""rvalue""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""slope""","fatalities__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""stderr""",...,new_deaths__time_reversal_asymmetry_statistic__lag_1,new_deaths__time_reversal_asymmetry_statistic__lag_2,new_deaths__time_reversal_asymmetry_statistic__lag_3,new_deaths__value_count__value_-inf,new_deaths__value_count__value_0,new_deaths__value_count__value_1,new_deaths__value_count__value_inf,new_deaths__value_count__value_nan,new_deaths__variance,new_deaths__variance_larger_than_standard_deviation
0,"('0', 'YE-AB')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
1,"('0', 'YE-AD')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,1.0,0.0,0.0,0.0,0.000000e+00,0.0
2,"('0', 'YE-AM')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
3,"('0', 'YE-BA')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
4,"('0', 'YE-DA')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
5,"('0', 'YE-DH')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
6,"('0', 'YE-HD-AL')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,1.0,0.0,0.0,0.0,0.000000e+00,0.0
7,"('0', 'YE-HJ')",1.773776e-07,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
8,"('0', 'YE-HU')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0
9,"('0', 'YE-IB')",0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0


In [21]:
week_1_rename_dict = {val:(val + '_week_1') for val in extracted_features_week_1.columns}

extracted_features_week_1 = extracted_features_week_1.rename(columns=week_1_rename_dict)

In [22]:
week_2_rename_dict = {val:(val + '_week_2') for val in extracted_features_week_2.columns}

extracted_features_week_2 = extracted_features_week_2.rename(columns=week_2_rename_dict)

In [23]:
week_4_rename_dict = {val:(val + '_week_4') for val in extracted_features_week_4.columns}

extracted_features_week_4 = extracted_features_week_4.rename(columns=week_4_rename_dict)

In [24]:
week_6_rename_dict = {val:(val + '_week_6') for val in extracted_features_week_6.columns}

extracted_features_week_6 = extracted_features_week_6.rename(columns=week_6_rename_dict)

In [35]:
extracted_features_week_1 = extracted_features_week_1.rename(columns = {'id_week_1':'id'}).set_index('id')

In [123]:
extracted_features_week_1 = extracted_features_week_1.apply(pd.to_numeric, downcast = 'float')

In [124]:
full_extracted_features = extracted_features_week_1.merge(extracted_features_week_2, left_index = True, right_index = True).merge(extracted_features_week_4, left_index = True, right_index = True).merge(extracted_features_week_6, left_index = True, right_index = True)

In [125]:
full_extracted_features['days_from'] = full_extracted_features.reset_index().id.apply(literal_eval).apply(pd.Series)[0]

In [126]:
full_extracted_features['gov_iso'] = full_extracted_features.reset_index().id.apply(literal_eval).apply(pd.Series)[1]

In [127]:
full_extracted_features_processed = full_extracted_features.reset_index(drop = True)

In [128]:
full_extracted_features_processed[['days_from', 'gov_iso']] = full_extracted_features.reset_index().id.apply(literal_eval).apply(pd.Series)

In [129]:
y_df = pd.read_csv('/Users/Rohil/Documents/iGEM/yemen/cholera_epi_data/y_normalized_groupby_week_df.csv')

In [130]:
full_extracted_features_processed.days_from = full_extracted_features_processed.days_from.map(int)

In [131]:
full_extracted_features_processed = full_extracted_features_processed.merge(week_2_rolled_timeseries[['days_from', 'date']].drop_duplicates(), on = 'days_from', how = 'left')

In [132]:
full_extracted_features_processed.date = pd.to_datetime(full_extracted_features_processed.date , format = '%Y-%m-%d')

In [133]:
y_df.date = pd.to_datetime(y_df.date , format = '%Y-%m-%d')

In [134]:
full_extracted_features_processed.columns

Index(['fatalities__abs_energy_week_1',
       'fatalities__absolute_sum_of_changes_week_1',
       'fatalities__agg_autocorrelation__f_agg_"mean"_week_1',
       'fatalities__agg_autocorrelation__f_agg_"median"_week_1',
       'fatalities__agg_autocorrelation__f_agg_"var"_week_1',
       'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_10__attr_"intercept"_week_1',
       'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_10__attr_"rvalue"_week_1',
       'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_10__attr_"slope"_week_1',
       'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_10__attr_"stderr"_week_1',
       'fatalities__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"intercept"_week_1',
       ...
       'new_deaths__value_count__value_-inf_week_6',
       'new_deaths__value_count__value_0_week_6',
       'new_deaths__value_count__value_1_week_6',
       'new_deaths__value_count__value_inf_week_6',
       'new_deaths__value_count__value_nan_week_6',
    

In [135]:
full_features_final = full_extracted_features_processed.merge(y_df, how = 'left', on = ['date', 'gov_iso'])[full_extracted_features_processed.columns]

In [136]:
y_df = full_extracted_features_processed.merge(y_df, how = 'left', on = ['date', 'gov_iso'])[y_df.columns]

In [137]:
full_features_final.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5712 entries, 0 to 5711
Columns: 38115 entries, fatalities__abs_energy_week_1 to date
dtypes: datetime64[ns](1), float32(38112), int64(1), object(1)
memory usage: 830.9 MB


In [139]:
full_features_final.to_pickle('/Users/Rohil/Documents/iGEM/yemen/exhaustive_extracted_features.pkl')

In [140]:
y_df.to_pickle('/Users/Rohil/Documents/iGEM/yemen/y_df_for_feature_selection.pkl')