In [0]:
!pip install tqdm
!pip install xlrd



In [0]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool, cpu_count

num_partitions = cpu_count() * 2 #number of partitions to split dataframe
num_cores = cpu_count() #number of cores on your machine

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)

In [0]:
cpu_count()

8

In [0]:
%%time
data = pd.read_excel('dataset/Cleaned_transactions_data.xlsx', converters={'Cardnum':str})

CPU times: user 9.75 s, sys: 40.2 ms, total: 9.79 s
Wall time: 9.79 s


In [0]:
data = data.merge(data.groupby('Date')['Recnum'].first().rename('Recnum_start_day').reset_index().drop_duplicates(), on = 'Date', how = 'left')
data['Recnum_start_day'] = data['Recnum'] - data['Recnum_start_day']
data['Date'] = pd.to_timedelta(data['Recnum_start_day'], unit = 's') + data['Date']
data = data.drop(columns = ['Recnum_start_day'])

In [0]:
data.head()

Unnamed: 0,Recnum,Cardnum,Date,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud
0,1,5142190439,2010-01-01 00:00:00,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118,P,3.62,0
1,2,5142183973,2010-01-01 00:00:01,61003026333,SERVICE MERCHANDISE #81,MA,1803,P,31.42,0
2,3,5142131721,2010-01-01 00:00:02,4503082993600,OFFICE DEPOT #191,MD,20706,P,178.49,0
3,4,5142148452,2010-01-01 00:00:03,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118,P,3.62,0
4,5,5142190439,2010-01-01 00:00:04,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118,P,3.62,0


In [0]:
%%time
for keys in [['Cardnum', 'Merchnum'], ['Cardnum', 'Merch zip'], ['Cardnum', 'Merch state']]:
    data['_'.join(keys)] = data[keys[0]].apply(str) + '_' + data[keys[1]].apply(str)

CPU times: user 123 ms, sys: 15.6 ms, total: 139 ms
Wall time: 138 ms


In [0]:
def parallelize_dataframe(df, func):
    df_split = np.array_split(df, num_partitions)
    pool = Pool(num_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return df

In [0]:
def get_features(data):
    attributes = ['Cardnum', 'Merchnum', 'Cardnum_Merchnum', 'Cardnum_Merch zip', 'Cardnum_Merch state']
    days = ['86400s', '172800s', '345600s', '691200s', '1296000s', '2678400s']
    fns = ['mean', 'max', 'median','sum']
    for attribute in attributes:
        attribute_groupby = data.set_index('Date').groupby(attribute)
        for day in tqdm(days):
            day_text = str(int(day[:-1])/86400)+'d'
            attribute_groupby_amt_day = attribute_groupby.rolling(day)['Amount']
            for fn in fns:    
                data['Amount_'+attribute+'_'+fn+'_'+day_text] = data[['Date', attribute]].merge(attribute_groupby_amt_day.agg(fn).reset_index(), on = ['Date', attribute], how = 'left')['Amount']
                data['Actual_Amount/'+attribute+'_'+fn+'_'+day_text] = data['Amount'] / data['Amount_'+attribute+'_'+fn+'_'+day_text]
            data[attribute+'_count_'+day_text] = data[['Date', attribute]].merge(attribute_groupby_amt_day.count().reset_index(), on = ['Date', attribute], how = 'left')['Amount']
        data['Days_since_'+attribute] = data.groupby(attribute)['Date'].diff() / np.timedelta64(1, 'D')

    for attribute in attributes[:2]:
        for day1 in tqdm(days[:2]):
            day1_text = str(int(day[:-1])/86400)+'d'
            for day2 in days[-3:]:
                day2_text = str(int(int(day[:-1])/86400)-1)+'d'
                for fn1 in ['mean', 'count']:
                    for fn2 in ['mean', 'count']:
                        numerator = data[[attribute, 'Date']].merge(data.set_index('Date').groupby(attribute).rolling(day1)['Amount'].agg(fn1).reset_index(), on = [attribute, 'Date'])['Amount']
                        denominator = data[[attribute, 'Date']].merge(data.set_index('Date').groupby(attribute).rolling(day2)['Amount'].agg(fn2).reset_index(level = 0).groupby(attribute).rolling('86400s')['Amount'].mean().reset_index(), on = [attribute, 'Date'])['Amount']
                        data[fn1+'_'+attribute+'_'+day1_text+'/'+'mean_'+fn2+'_'+attribute+'_'+day2_text] = numerator / denominator
    return data

In [0]:
%%time
data = parallelize_dataframe(data, get_features)

100%|██████████| 6/6 [00:24<00:00,  4.17s/it]
100%|██████████| 6/6 [00:25<00:00,  4.21s/it]
100%|██████████| 6/6 [00:25<00:00,  4.29s/it]
100%|██████████| 6/6 [00:26<00:00,  4.34s/it]
100%|██████████| 6/6 [00:26<00:00,  4.41s/it]
100%|██████████| 6/6 [00:26<00:00,  4.40s/it]
100%|██████████| 6/6 [00:26<00:00,  4.44s/it]
100%|██████████| 6/6 [00:26<00:00,  4.48s/it]
100%|██████████| 6/6 [00:49<00:00,  8.26s/it]
100%|██████████| 6/6 [00:49<00:00,  8.27s/it]
100%|██████████| 6/6 [00:51<00:00,  8.55s/it]
100%|██████████| 6/6 [00:52<00:00,  8.79s/it]
100%|██████████| 6/6 [00:51<00:00,  8.57s/it]
100%|██████████| 6/6 [00:53<00:00,  8.97s/it]
100%|██████████| 6/6 [00:54<00:00,  9.07s/it]
100%|██████████| 6/6 [00:54<00:00,  9.05s/it]
100%|██████████| 6/6 [01:31<00:00, 15.32s/it]
100%|██████████| 6/6 [01:36<00:00, 16.01s/it]
100%|██████████| 6/6 [01:39<00:00, 16.64s/it]
100%|██████████| 6/6 [01:39<00:00, 16.53s/it]
100%|██████████| 6/6 [01:38<00:00, 16.46s/it]
100%|██████████| 6/6 [01:41<00:00,

CPU times: user 1.79 s, sys: 922 ms, total: 2.71 s
Wall time: 18min 11s


In [0]:
data.head()

Unnamed: 0,Recnum,Cardnum,Date,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,Cardnum_Merchnum,Cardnum_Merch zip,Cardnum_Merch state,Amount_Cardnum_mean_1.0d,Actual_Amount/Cardnum_mean_1.0d,Amount_Cardnum_max_1.0d,Actual_Amount/Cardnum_max_1.0d,Amount_Cardnum_median_1.0d,Actual_Amount/Cardnum_median_1.0d,Amount_Cardnum_sum_1.0d,Actual_Amount/Cardnum_sum_1.0d,Cardnum_count_1.0d,Amount_Cardnum_mean_2.0d,Actual_Amount/Cardnum_mean_2.0d,Amount_Cardnum_max_2.0d,Actual_Amount/Cardnum_max_2.0d,Amount_Cardnum_median_2.0d,Actual_Amount/Cardnum_median_2.0d,Amount_Cardnum_sum_2.0d,Actual_Amount/Cardnum_sum_2.0d,Cardnum_count_2.0d,Amount_Cardnum_mean_4.0d,Actual_Amount/Cardnum_mean_4.0d,Amount_Cardnum_max_4.0d,Actual_Amount/Cardnum_max_4.0d,Amount_Cardnum_median_4.0d,Actual_Amount/Cardnum_median_4.0d,Amount_Cardnum_sum_4.0d,Actual_Amount/Cardnum_sum_4.0d,Cardnum_count_4.0d,Amount_Cardnum_mean_8.0d,Actual_Amount/Cardnum_mean_8.0d,Amount_Cardnum_max_8.0d,Actual_Amount/Cardnum_max_8.0d,Amount_Cardnum_median_8.0d,Actual_Amount/Cardnum_median_8.0d,Amount_Cardnum_sum_8.0d,Actual_Amount/Cardnum_sum_8.0d,Cardnum_count_8.0d,Amount_Cardnum_mean_15.0d,Actual_Amount/Cardnum_mean_15.0d,Amount_Cardnum_max_15.0d,Actual_Amount/Cardnum_max_15.0d,Amount_Cardnum_median_15.0d,Actual_Amount/Cardnum_median_15.0d,Amount_Cardnum_sum_15.0d,Actual_Amount/Cardnum_sum_15.0d,Cardnum_count_15.0d,Amount_Cardnum_mean_31.0d,Actual_Amount/Cardnum_mean_31.0d,Amount_Cardnum_max_31.0d,Actual_Amount/Cardnum_max_31.0d,Amount_Cardnum_median_31.0d,Actual_Amount/Cardnum_median_31.0d,Amount_Cardnum_sum_31.0d,Actual_Amount/Cardnum_sum_31.0d,Cardnum_count_31.0d,Days_since_Cardnum,Amount_Merchnum_mean_1.0d,Actual_Amount/Merchnum_mean_1.0d,Amount_Merchnum_max_1.0d,Actual_Amount/Merchnum_max_1.0d,Amount_Merchnum_median_1.0d,Actual_Amount/Merchnum_median_1.0d,Amount_Merchnum_sum_1.0d,Actual_Amount/Merchnum_sum_1.0d,Merchnum_count_1.0d,Amount_Merchnum_mean_2.0d,Actual_Amount/Merchnum_mean_2.0d,Amount_Merchnum_max_2.0d,Actual_Amount/Merchnum_max_2.0d,Amount_Merchnum_median_2.0d,Actual_Amount/Merchnum_median_2.0d,Amount_Merchnum_sum_2.0d,Actual_Amount/Merchnum_sum_2.0d,Merchnum_count_2.0d,Amount_Merchnum_mean_4.0d,Actual_Amount/Merchnum_mean_4.0d,Amount_Merchnum_max_4.0d,Actual_Amount/Merchnum_max_4.0d,Amount_Merchnum_median_4.0d,Actual_Amount/Merchnum_median_4.0d,Amount_Merchnum_sum_4.0d,Actual_Amount/Merchnum_sum_4.0d,Merchnum_count_4.0d,Amount_Merchnum_mean_8.0d,Actual_Amount/Merchnum_mean_8.0d,Amount_Merchnum_max_8.0d,Actual_Amount/Merchnum_max_8.0d,Amount_Merchnum_median_8.0d,Actual_Amount/Merchnum_median_8.0d,Amount_Merchnum_sum_8.0d,Actual_Amount/Merchnum_sum_8.0d,Merchnum_count_8.0d,Amount_Merchnum_mean_15.0d,Actual_Amount/Merchnum_mean_15.0d,Amount_Merchnum_max_15.0d,Actual_Amount/Merchnum_max_15.0d,Amount_Merchnum_median_15.0d,Actual_Amount/Merchnum_median_15.0d,Amount_Merchnum_sum_15.0d,Actual_Amount/Merchnum_sum_15.0d,Merchnum_count_15.0d,Amount_Merchnum_mean_31.0d,Actual_Amount/Merchnum_mean_31.0d,Amount_Merchnum_max_31.0d,Actual_Amount/Merchnum_max_31.0d,Amount_Merchnum_median_31.0d,Actual_Amount/Merchnum_median_31.0d,Amount_Merchnum_sum_31.0d,Actual_Amount/Merchnum_sum_31.0d,Merchnum_count_31.0d,Days_since_Merchnum,Amount_Cardnum_Merchnum_mean_1.0d,Actual_Amount/Cardnum_Merchnum_mean_1.0d,Amount_Cardnum_Merchnum_max_1.0d,Actual_Amount/Cardnum_Merchnum_max_1.0d,Amount_Cardnum_Merchnum_median_1.0d,Actual_Amount/Cardnum_Merchnum_median_1.0d,Amount_Cardnum_Merchnum_sum_1.0d,Actual_Amount/Cardnum_Merchnum_sum_1.0d,Cardnum_Merchnum_count_1.0d,Amount_Cardnum_Merchnum_mean_2.0d,Actual_Amount/Cardnum_Merchnum_mean_2.0d,Amount_Cardnum_Merchnum_max_2.0d,Actual_Amount/Cardnum_Merchnum_max_2.0d,Amount_Cardnum_Merchnum_median_2.0d,Actual_Amount/Cardnum_Merchnum_median_2.0d,Amount_Cardnum_Merchnum_sum_2.0d,Actual_Amount/Cardnum_Merchnum_sum_2.0d,Cardnum_Merchnum_count_2.0d,Amount_Cardnum_Merchnum_mean_4.0d,Actual_Amount/Cardnum_Merchnum_mean_4.0d,Amount_Cardnum_Merchnum_max_4.0d,Actual_Amount/Cardnum_Merchnum_max_4.0d,Amount_Cardnum_Merchnum_median_4.0d,Actual_Amount/Cardnum_Merchnum_median_4.0d,Amount_Cardnum_Merchnum_sum_4.0d,Actual_Amount/Cardnum_Merchnum_sum_4.0d,Cardnum_Merchnum_count_4.0d,Amount_Cardnum_Merchnum_mean_8.0d,Actual_Amount/Cardnum_Merchnum_mean_8.0d,Amount_Cardnum_Merchnum_max_8.0d,Actual_Amount/Cardnum_Merchnum_max_8.0d,Amount_Cardnum_Merchnum_median_8.0d,Actual_Amount/Cardnum_Merchnum_median_8.0d,Amount_Cardnum_Merchnum_sum_8.0d,Actual_Amount/Cardnum_Merchnum_sum_8.0d,Cardnum_Merchnum_count_8.0d,Amount_Cardnum_Merchnum_mean_15.0d,Actual_Amount/Cardnum_Merchnum_mean_15.0d,Amount_Cardnum_Merchnum_max_15.0d,Actual_Amount/Cardnum_Merchnum_max_15.0d,Amount_Cardnum_Merchnum_median_15.0d,Actual_Amount/Cardnum_Merchnum_median_15.0d,Amount_Cardnum_Merchnum_sum_15.0d,Actual_Amount/Cardnum_Merchnum_sum_15.0d,Cardnum_Merchnum_count_15.0d,Amount_Cardnum_Merchnum_mean_31.0d,Actual_Amount/Cardnum_Merchnum_mean_31.0d,Amount_Cardnum_Merchnum_max_31.0d,Actual_Amount/Cardnum_Merchnum_max_31.0d,Amount_Cardnum_Merchnum_median_31.0d,Actual_Amount/Cardnum_Merchnum_median_31.0d,Amount_Cardnum_Merchnum_sum_31.0d,Actual_Amount/Cardnum_Merchnum_sum_31.0d,Cardnum_Merchnum_count_31.0d,Days_since_Cardnum_Merchnum,Amount_Cardnum_Merch zip_mean_1.0d,Actual_Amount/Cardnum_Merch zip_mean_1.0d,Amount_Cardnum_Merch zip_max_1.0d,Actual_Amount/Cardnum_Merch zip_max_1.0d,Amount_Cardnum_Merch zip_median_1.0d,Actual_Amount/Cardnum_Merch zip_median_1.0d,Amount_Cardnum_Merch zip_sum_1.0d,Actual_Amount/Cardnum_Merch zip_sum_1.0d,Cardnum_Merch zip_count_1.0d,Amount_Cardnum_Merch zip_mean_2.0d,Actual_Amount/Cardnum_Merch zip_mean_2.0d,Amount_Cardnum_Merch zip_max_2.0d,Actual_Amount/Cardnum_Merch zip_max_2.0d,Amount_Cardnum_Merch zip_median_2.0d,Actual_Amount/Cardnum_Merch zip_median_2.0d,Amount_Cardnum_Merch zip_sum_2.0d,Actual_Amount/Cardnum_Merch zip_sum_2.0d,Cardnum_Merch zip_count_2.0d,Amount_Cardnum_Merch zip_mean_4.0d,Actual_Amount/Cardnum_Merch zip_mean_4.0d,Amount_Cardnum_Merch zip_max_4.0d,Actual_Amount/Cardnum_Merch zip_max_4.0d,Amount_Cardnum_Merch zip_median_4.0d,Actual_Amount/Cardnum_Merch zip_median_4.0d,Amount_Cardnum_Merch zip_sum_4.0d,Actual_Amount/Cardnum_Merch zip_sum_4.0d,Cardnum_Merch zip_count_4.0d,Amount_Cardnum_Merch zip_mean_8.0d,Actual_Amount/Cardnum_Merch zip_mean_8.0d,Amount_Cardnum_Merch zip_max_8.0d,Actual_Amount/Cardnum_Merch zip_max_8.0d,Amount_Cardnum_Merch zip_median_8.0d,Actual_Amount/Cardnum_Merch zip_median_8.0d,Amount_Cardnum_Merch zip_sum_8.0d,Actual_Amount/Cardnum_Merch zip_sum_8.0d,Cardnum_Merch zip_count_8.0d,Amount_Cardnum_Merch zip_mean_15.0d,Actual_Amount/Cardnum_Merch zip_mean_15.0d,Amount_Cardnum_Merch zip_max_15.0d,Actual_Amount/Cardnum_Merch zip_max_15.0d,Amount_Cardnum_Merch zip_median_15.0d,Actual_Amount/Cardnum_Merch zip_median_15.0d,Amount_Cardnum_Merch zip_sum_15.0d,Actual_Amount/Cardnum_Merch zip_sum_15.0d,Cardnum_Merch zip_count_15.0d,Amount_Cardnum_Merch zip_mean_31.0d,Actual_Amount/Cardnum_Merch zip_mean_31.0d,Amount_Cardnum_Merch zip_max_31.0d,Actual_Amount/Cardnum_Merch zip_max_31.0d,Amount_Cardnum_Merch zip_median_31.0d,Actual_Amount/Cardnum_Merch zip_median_31.0d,Amount_Cardnum_Merch zip_sum_31.0d,Actual_Amount/Cardnum_Merch zip_sum_31.0d,Cardnum_Merch zip_count_31.0d,Days_since_Cardnum_Merch zip,Amount_Cardnum_Merch state_mean_1.0d,Actual_Amount/Cardnum_Merch state_mean_1.0d,Amount_Cardnum_Merch state_max_1.0d,Actual_Amount/Cardnum_Merch state_max_1.0d,Amount_Cardnum_Merch state_median_1.0d,Actual_Amount/Cardnum_Merch state_median_1.0d,Amount_Cardnum_Merch state_sum_1.0d,Actual_Amount/Cardnum_Merch state_sum_1.0d,Cardnum_Merch state_count_1.0d,Amount_Cardnum_Merch state_mean_2.0d,Actual_Amount/Cardnum_Merch state_mean_2.0d,Amount_Cardnum_Merch state_max_2.0d,Actual_Amount/Cardnum_Merch state_max_2.0d,Amount_Cardnum_Merch state_median_2.0d,Actual_Amount/Cardnum_Merch state_median_2.0d,Amount_Cardnum_Merch state_sum_2.0d,Actual_Amount/Cardnum_Merch state_sum_2.0d,Cardnum_Merch state_count_2.0d,Amount_Cardnum_Merch state_mean_4.0d,Actual_Amount/Cardnum_Merch state_mean_4.0d,Amount_Cardnum_Merch state_max_4.0d,Actual_Amount/Cardnum_Merch state_max_4.0d,Amount_Cardnum_Merch state_median_4.0d,Actual_Amount/Cardnum_Merch state_median_4.0d,Amount_Cardnum_Merch state_sum_4.0d,Actual_Amount/Cardnum_Merch state_sum_4.0d,Cardnum_Merch state_count_4.0d,Amount_Cardnum_Merch state_mean_8.0d,Actual_Amount/Cardnum_Merch state_mean_8.0d,Amount_Cardnum_Merch state_max_8.0d,Actual_Amount/Cardnum_Merch state_max_8.0d,Amount_Cardnum_Merch state_median_8.0d,Actual_Amount/Cardnum_Merch state_median_8.0d,Amount_Cardnum_Merch state_sum_8.0d,Actual_Amount/Cardnum_Merch state_sum_8.0d,Cardnum_Merch state_count_8.0d,Amount_Cardnum_Merch state_mean_15.0d,Actual_Amount/Cardnum_Merch state_mean_15.0d,Amount_Cardnum_Merch state_max_15.0d,Actual_Amount/Cardnum_Merch state_max_15.0d,Amount_Cardnum_Merch state_median_15.0d,Actual_Amount/Cardnum_Merch state_median_15.0d,Amount_Cardnum_Merch state_sum_15.0d,Actual_Amount/Cardnum_Merch state_sum_15.0d,Cardnum_Merch state_count_15.0d,Amount_Cardnum_Merch state_mean_31.0d,Actual_Amount/Cardnum_Merch state_mean_31.0d,Amount_Cardnum_Merch state_max_31.0d,Actual_Amount/Cardnum_Merch state_max_31.0d,Amount_Cardnum_Merch state_median_31.0d,Actual_Amount/Cardnum_Merch state_median_31.0d,Amount_Cardnum_Merch state_sum_31.0d,Actual_Amount/Cardnum_Merch state_sum_31.0d,Cardnum_Merch state_count_31.0d,Days_since_Cardnum_Merch state,mean_Cardnum_31.0d/mean_mean_Cardnum_30d,mean_Cardnum_31.0d/mean_count_Cardnum_30d,count_Cardnum_31.0d/mean_mean_Cardnum_30d,count_Cardnum_31.0d/mean_count_Cardnum_30d,mean_Merchnum_31.0d/mean_mean_Merchnum_30d,mean_Merchnum_31.0d/mean_count_Merchnum_30d,count_Merchnum_31.0d/mean_mean_Merchnum_30d,count_Merchnum_31.0d/mean_count_Merchnum_30d
0,1,5142190439,2010-01-01 00:00:00,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118,P,3.62,0,5142190439_5509006296254,5142190439_38118,5142190439_TN,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,1.0,3.62,0.276243,1.0,1.0,3.62,0.276243,1.0
1,2,5142183973,2010-01-01 00:00:01,61003026333,SERVICE MERCHANDISE #81,MA,1803,P,31.42,0,5142183973_61003026333,5142183973_1803,5142183973_MA,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,31.42,1.0,31.42,1.0,31.42,1.0,31.42,1.0,1.0,,1.0,31.42,0.031827,1.0,1.0,31.42,0.031827,1.0
2,3,5142131721,2010-01-01 00:00:02,4503082993600,OFFICE DEPOT #191,MD,20706,P,178.49,0,5142131721_4503082993600,5142131721_20706,5142131721_MD,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,178.49,1.0,178.49,1.0,178.49,1.0,178.49,1.0,1.0,,1.0,178.49,0.005603,1.0,1.0,178.49,0.005603,1.0
3,4,5142148452,2010-01-01 00:00:03,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118,P,3.62,0,5142148452_5509006296254,5142148452_38118,5142148452_TN,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.5e-05,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,3.62,1.0,3.62,1.0,3.62,1.0,3.62,1.0,1.0,,1.0,3.62,0.276243,1.0,1.0,2.413333,0.552486,1.333333
4,5,5142190439,2010-01-01 00:00:04,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118,P,3.62,0,5142190439_5509006296254,5142190439_38118,5142190439_TN,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,4.6e-05,3.62,1.0,3.62,1.0,3.62,1.0,10.86,0.333333,3.0,3.62,1.0,3.62,1.0,3.62,1.0,10.86,0.333333,3.0,3.62,1.0,3.62,1.0,3.62,1.0,10.86,0.333333,3.0,3.62,1.0,3.62,1.0,3.62,1.0,10.86,0.333333,3.0,3.62,1.0,3.62,1.0,3.62,1.0,10.86,0.333333,3.0,3.62,1.0,3.62,1.0,3.62,1.0,10.86,0.333333,3.0,1.2e-05,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,4.6e-05,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,4.6e-05,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,3.62,1.0,3.62,1.0,3.62,1.0,7.24,0.5,2.0,4.6e-05,1.0,2.413333,0.552486,1.333333,1.0,1.81,0.828729,1.5


In [0]:
data.shape

(96397, 296)

In [0]:
data.to_csv('dataset/features.csv')