In [1]:
import csv, sys, collections
from datetime import datetime
from utils.common import *
from tqdm import tqdm

# Configuration

In [2]:
RAW_TRAIN_PATH = "../tr.r0.csv"
RAW_VAL_PATH = "../va.r0.csv"

# file name after pre-processing step (processed csv file)
PROCESSED_TRAIN_PATH = "processed_train.csv"
PROCESSED_VAL_PATH = "processed_val.csv"

# hashed csv file's name
HASHED_TRAIN_PATH = "hashed_train.csv"
HASHED_VAL_PATH = "hashed_val.csv"

ROWS_FOR_TRAINING = 4 * 1e6
LEARNING_RATE = 0.03
EPOCHS = 13

# removed 'pub_id', 'pub_domain', 'pub_category', added 'app/site_id', 'app/site_domain', 'app/site_category'
FIELDS = ['id','click','hour','banner_pos','device_id','device_ip','device_model','device_conn_type','C14','C17','C20','C21',
         'app_id', 'app_domain', 'app_category', 'site_id', 'site_domain', 'site_category']
NEW_FIELDS = FIELDS+['device_id_count','device_ip_count','user_count','smooth_user_hour_count','user_click_histroy']

# Calculate counting features

In [3]:
def scan(path, is_train):
    '''
    copy from base/util/gen_data.py
    '''
    for i, row in tqdm(enumerate(csv.DictReader(open(path)), start=1)):
        if i >= ROWS_FOR_TRAINING and is_train:
            break
        user = def_user(row)
        id_cnt[row['device_id']] += 1
        ip_cnt[row['device_ip']] += 1
        user_cnt[user] += 1
        user_hour_cnt[user+'-'+row['hour']] += 1

In [4]:
start = datetime.now()
print('Start: {0}'.format(str(start)))

id_cnt = collections.defaultdict(int)
ip_cnt = collections.defaultdict(int)
user_cnt = collections.defaultdict(int)
user_hour_cnt = collections.defaultdict(int)

scan(RAW_TRAIN_PATH, True)
scan(RAW_VAL_PATH, False)
print('End: {0}, Elapsed time: {1}'.format(
        str(datetime.now()),
        str(datetime.now() - start))
     )

2994it [00:00, 28625.04it/s]

Start: 2020-02-09 16:00:11.189515


3996164it [00:56, 48860.26it/s]
0it [00:00, ?it/s][A
4860it [00:00, 48599.70it/s][A
8939it [00:00, 45958.86it/s][A
16428it [00:00, 51982.38it/s][A
24601it [00:00, 58352.82it/s][A
31143it [00:00, 60305.27it/s][A
36693it [00:00, 57581.74it/s][A
44129it [00:00, 61761.40it/s][A
50851it [00:00, 63302.86it/s][A
57099it [00:00, 57961.75it/s][A
62925it [00:01, 52588.27it/s][A
68301it [00:01, 48985.63it/s][A
75535it [00:01, 54235.40it/s][A
82036it [00:01, 57071.20it/s][A
87970it [00:01, 51930.17it/s][A
93411it [00:01, 45686.99it/s][A
98290it [00:01, 42228.61it/s][A
103093it [00:01, 43815.58it/s][A
111000it [00:02, 50579.35it/s][A
116582it [00:02, 49837.51it/s][A
122321it [00:02, 51884.86it/s][A
127790it [00:02, 50026.52it/s][A
135819it [00:02, 56403.96it/s][A
144430it [00:02, 62914.33it/s][A
152117it [00:02, 66537.06it/s][A
160400it [00:02, 70709.51it/s][A
167843it [00:02, 66085.92it/s][A
174773it [00:03, 49292.05it/s][A
180553it [00:03, 50862.85it/s][A
188660it [00

1707353it [00:26, 67548.37it/s][A
1714147it [00:26, 66141.73it/s][A
1720877it [00:26, 66482.63it/s][A
1727550it [00:26, 65497.81it/s][A
1734620it [00:27, 66975.47it/s][A
1741340it [00:27, 67039.48it/s][A
1748264it [00:27, 67684.40it/s][A
1755045it [00:27, 67214.12it/s][A
1761776it [00:27, 62807.19it/s][A
1768122it [00:27, 59918.76it/s][A
1774188it [00:27, 57857.76it/s][A
1780041it [00:27, 57274.65it/s][A
1786020it [00:27, 58005.24it/s][A
1793970it [00:28, 63124.61it/s][A
1801980it [00:28, 67409.73it/s][A
1809825it [00:28, 70380.41it/s][A
1817586it [00:28, 72403.18it/s][A
1824959it [00:28, 72166.70it/s][A
1832269it [00:28, 70212.06it/s][A
1839367it [00:28, 69006.89it/s][A
1846386it [00:28, 69356.75it/s][A
1853625it [00:28, 70237.43it/s][A
1860791it [00:28, 70656.61it/s][A
1868638it [00:29, 72831.03it/s][A
1876201it [00:29, 73648.28it/s][A
1884101it [00:29, 75174.41it/s][A
1892115it [00:29, 76596.86it/s][A
1899799it [00:29, 72497.85it/s][A
1907112it [00:29, 72

3188994it [00:52, 65203.77it/s][A
3195518it [00:52, 61989.20it/s][A
3201753it [00:52, 59923.78it/s][A
3207786it [00:53, 56738.03it/s][A
3213523it [00:53, 52842.04it/s][A
3218906it [00:53, 51849.61it/s][A
3224164it [00:53, 51837.94it/s][A
3229399it [00:53, 50561.66it/s][A
3235157it [00:53, 52479.41it/s][A
3242214it [00:53, 56850.48it/s][A
3248039it [00:53, 55873.27it/s][A
3253796it [00:53, 56369.70it/s][A
3259505it [00:54, 54876.30it/s][A
3265052it [00:54, 53812.49it/s][A
3271271it [00:54, 56076.94it/s][A
3276936it [00:54, 44587.81it/s][A
3282562it [00:54, 47545.35it/s][A
3289055it [00:54, 51696.79it/s][A
3296056it [00:54, 56098.28it/s][A
3302335it [00:54, 57947.28it/s][A
3309062it [00:54, 60458.38it/s][A
3315319it [00:55, 55595.83it/s][A
3321104it [00:55, 52197.12it/s][A
3327762it [00:55, 55812.73it/s][A
3334946it [00:55, 59814.55it/s][A
3342199it [00:55, 63133.86it/s][A
3349438it [00:55, 65650.48it/s][A
3356655it [00:55, 67477.83it/s][A
3363807it [00:55, 68

End: 2020-02-09 16:02:23.858922, Elapsed time: 0:02:12.669485





# Add counting features & history features to new csv file 

In [5]:
def gen_data(src_path, dst_path, is_train):
    '''
    copy from base/util/gen_data.py
    '''
    reader = csv.DictReader(open(src_path))
    writer = csv.DictWriter(open(dst_path, 'w'), NEW_FIELDS)
    writer.writeheader()

    for i, row in tqdm(enumerate(reader, start=1)):
        if i >= ROWS_FOR_TRAINING and is_train:
            break
        new_row = {}
        for field in FIELDS:
            new_row[field] = row[field]

        new_row['device_id_count'] = id_cnt[row['device_id']]
        new_row['device_ip_count'] = ip_cnt[row['device_ip']]

        user, hour = def_user(row), row['hour']
        new_row['user_count'] = user_cnt[user]
        new_row['smooth_user_hour_count'] = str(user_hour_cnt[user+'-'+hour])

        if has_id_info(row):

            if history[user]['prev_hour'] != row['hour']:
                history[user]['history'] = (history[user]['history'] + history[user]['buffer'])[-4:]
                history[user]['buffer'] = ''
                history[user]['prev_hour'] = row['hour']

            new_row['user_click_histroy'] = history[user]['history']

            if is_train:
                history[user]['buffer'] += row['click']
        else:
            new_row['user_click_histroy'] = ''
            
        writer.writerow(new_row)

In [6]:
start = datetime.now()
print('Start: {0}'.format(str(start)))

history = collections.defaultdict(lambda: {'history': '', 'buffer': '', 'prev_hour': ''})

gen_data(src_path=RAW_TRAIN_PATH, dst_path=PROCESSED_TRAIN_PATH,is_train=ROWS_FOR_TRAINING != -1)
gen_data(src_path=RAW_VAL_PATH, dst_path=PROCESSED_VAL_PATH, is_train=False)

print('End: {0}, Elapsed time: {1}'.format(
        str(datetime.now()),
        str(datetime.now() - start))
     )


0it [00:00, ?it/s][A
2564it [00:00, 25636.11it/s][A

Start: 2020-02-09 16:02:23.918442



4999it [00:00, 25234.75it/s][A
7370it [00:00, 24755.95it/s][A
10349it [00:00, 26077.37it/s][A
13291it [00:00, 26996.78it/s][A
16216it [00:00, 27635.12it/s][A
19299it [00:00, 28521.10it/s][A
22516it [00:00, 29524.58it/s][A
25611it [00:00, 29937.88it/s][A
28573it [00:01, 29840.01it/s][A
31498it [00:01, 28100.34it/s][A
34452it [00:01, 28514.56it/s][A
37291it [00:01, 28363.07it/s][A
40272it [00:01, 28780.00it/s][A
43291it [00:01, 29186.95it/s][A
46345it [00:01, 29579.76it/s][A
49445it [00:01, 29990.66it/s][A
52517it [00:01, 30205.70it/s][A
55539it [00:01, 30186.30it/s][A
58559it [00:02, 29777.38it/s][A
61539it [00:02, 29657.43it/s][A
64534it [00:02, 29743.53it/s][A
67510it [00:02, 25292.54it/s][A
70512it [00:02, 26546.28it/s][A
73265it [00:02, 26713.78it/s][A
76130it [00:02, 27266.36it/s][A
79039it [00:02, 27787.85it/s][A
82057it [00:02, 28463.59it/s][A
84935it [00:03, 27223.52it/s][A
87691it [00:03, 27046.73it/s][A
90512it [00:03, 27381.53it/s][A
93634it [00

748869it [00:25, 29168.33it/s][A
751976it [00:25, 29710.18it/s][A
755158it [00:25, 30311.94it/s][A
758346it [00:25, 30764.04it/s][A
761573it [00:25, 31198.62it/s][A
764742it [00:25, 31344.15it/s][A
767892it [00:25, 31057.96it/s][A
771010it [00:26, 30916.78it/s][A
774160it [00:26, 31087.28it/s][A
777288it [00:26, 31143.26it/s][A
780407it [00:26, 30962.33it/s][A
783653it [00:26, 31395.56it/s][A
786876it [00:26, 31638.88it/s][A
790043it [00:26, 31179.00it/s][A
793322it [00:26, 31644.61it/s][A
796491it [00:26, 30979.42it/s][A
799672it [00:26, 31223.62it/s][A
802835it [00:27, 31342.49it/s][A
806026it [00:27, 31509.42it/s][A
809226it [00:27, 31653.29it/s][A
812394it [00:27, 31473.98it/s][A
815553it [00:27, 31507.38it/s][A
818705it [00:27, 28197.57it/s][A
821591it [00:27, 21622.07it/s][A
824032it [00:27, 20284.92it/s][A
826272it [00:28, 17728.61it/s][A
828251it [00:28, 16376.62it/s][A
830054it [00:28, 14824.92it/s][A
831680it [00:28, 13476.51it/s][A
834399it [00:2

1476158it [00:50, 30439.61it/s][A
1479203it [00:50, 30415.98it/s][A
1482245it [00:50, 29598.83it/s][A
1485337it [00:50, 29980.81it/s][A
1488473it [00:50, 30381.65it/s][A
1491619it [00:50, 30696.42it/s][A
1494777it [00:50, 30951.99it/s][A
1497889it [00:50, 31000.71it/s][A
1501008it [00:50, 31056.80it/s][A
1504152it [00:51, 31170.39it/s][A
1507305it [00:51, 31275.37it/s][A
1510434it [00:51, 31085.51it/s][A
1513544it [00:51, 30640.69it/s][A
1516660it [00:51, 30793.64it/s][A
1519879it [00:51, 31199.22it/s][A
1523052it [00:51, 31355.92it/s][A
1526281it [00:51, 31628.10it/s][A
1529457it [00:51, 31665.58it/s][A
1532625it [00:51, 31205.14it/s][A
1535748it [00:52, 30234.39it/s][A
1538780it [00:52, 30024.14it/s][A
1541869it [00:52, 30276.60it/s][A
1544979it [00:52, 30517.58it/s][A
1548067it [00:52, 30625.14it/s][A
1551133it [00:52, 30188.36it/s][A
1554267it [00:52, 30524.54it/s][A
1557531it [00:52, 31129.63it/s][A
1560787it [00:52, 31544.35it/s][A
1563947it [00:52, 31

2210301it [01:13, 30953.80it/s][A
2213494it [01:14, 31234.35it/s][A
2216677it [01:14, 31409.88it/s][A
2219820it [01:14, 31349.25it/s][A
2222956it [01:14, 31337.80it/s][A
2226091it [01:14, 30980.11it/s][A
2229196it [01:14, 31000.30it/s][A
2232376it [01:14, 31232.37it/s][A
2235636it [01:14, 31628.53it/s][A
2238923it [01:14, 31989.44it/s][A
2242125it [01:15, 31473.43it/s][A
2245296it [01:15, 31541.86it/s][A
2248453it [01:15, 31470.67it/s][A
2251602it [01:15, 30900.75it/s][A
2254696it [01:15, 28780.17it/s][A
2257606it [01:15, 28448.31it/s][A
2260768it [01:15, 29329.95it/s][A
2263725it [01:15, 28786.99it/s][A
2266623it [01:15, 24527.06it/s][A
2269198it [01:16, 20650.10it/s][A
2271454it [01:16, 20305.40it/s][A
2273619it [01:16, 19663.05it/s][A
2276319it [01:16, 21408.04it/s][A
2278575it [01:16, 21662.92it/s][A
2281749it [01:16, 23942.25it/s][A
2285157it [01:16, 26288.05it/s][A
2288568it [01:16, 28229.09it/s][A
2291545it [01:16, 28468.85it/s][A
2294500it [01:17, 24

2892808it [01:38, 33277.08it/s][A
2896162it [01:38, 33353.04it/s][A
2899609it [01:38, 33679.38it/s][A
2903082it [01:38, 33986.63it/s][A
2906484it [01:38, 33380.97it/s][A
2909926it [01:38, 33684.86it/s][A
2913299it [01:39, 33668.81it/s][A
2916688it [01:39, 33732.64it/s][A
2920064it [01:39, 33518.52it/s][A
2923418it [01:39, 33433.61it/s][A
2926799it [01:39, 33543.97it/s][A
2930155it [01:39, 33307.16it/s][A
2933559it [01:39, 33521.68it/s][A
2937042it [01:39, 33903.53it/s][A
2940435it [01:39, 33635.47it/s][A
2943801it [01:39, 33269.21it/s][A
2947131it [01:40, 32907.37it/s][A
2950425it [01:40, 32628.72it/s][A
2953797it [01:40, 32945.98it/s][A
2957159it [01:40, 33145.24it/s][A
2960672it [01:40, 33716.63it/s][A
2964116it [01:40, 33927.12it/s][A
2967529it [01:40, 33985.88it/s][A
2970930it [01:40, 33929.74it/s][A
2974325it [01:40, 33747.48it/s][A
2977701it [01:40, 32894.30it/s][A
2980997it [01:41, 32888.25it/s][A
2984290it [01:41, 32718.19it/s][A
2987565it [01:41, 32

3691328it [02:02, 33471.47it/s][A
3694680it [02:02, 33460.75it/s][A
3698205it [02:02, 33977.77it/s][A
3701606it [02:02, 33649.37it/s][A
3704974it [02:02, 33204.99it/s][A
3708349it [02:02, 33364.75it/s][A
3711834it [02:02, 33795.89it/s][A
3715217it [02:02, 33647.57it/s][A
3718585it [02:03, 33553.28it/s][A
3722049it [02:03, 33871.87it/s][A
3725439it [02:03, 33721.24it/s][A
3728813it [02:03, 33504.74it/s][A
3732202it [02:03, 33618.19it/s][A
3735770it [02:03, 34209.10it/s][A
3739195it [02:03, 34040.25it/s][A
3742662it [02:03, 34226.75it/s][A
3746087it [02:03, 31389.08it/s][A
3749535it [02:04, 32256.29it/s][A
3752964it [02:04, 32840.51it/s][A
3756279it [02:04, 32901.97it/s][A
3759715it [02:04, 33324.65it/s][A
3763068it [02:04, 33383.70it/s][A
3766532it [02:04, 33750.90it/s][A
3769956it [02:04, 33893.84it/s][A
3773352it [02:04, 33673.39it/s][A
3776922it [02:04, 34256.00it/s][A
3780434it [02:04, 34510.50it/s][A
3783890it [02:05, 34469.73it/s][A
3787341it [02:05, 34

432333it [00:13, 23511.09it/s][A[A

434902it [00:13, 23713.09it/s][A[A

437426it [00:13, 24122.24it/s][A[A

439947it [00:13, 23740.77it/s][A[A

442903it [00:14, 25229.12it/s][A[A

446331it [00:14, 27398.34it/s][A[A

449613it [00:14, 28826.41it/s][A[A

452740it [00:14, 29516.68it/s][A[A

456282it [00:14, 31067.97it/s][A[A

459623it [00:14, 31732.76it/s][A[A

462851it [00:14, 31572.65it/s][A[A

466246it [00:14, 32248.49it/s][A[A

469564it [00:14, 32521.27it/s][A[A

472838it [00:14, 32573.84it/s][A[A

476385it [00:15, 33390.15it/s][A[A

479837it [00:15, 33719.01it/s][A[A

483240it [00:15, 33808.88it/s][A[A

486785it [00:15, 34283.48it/s][A[A

490221it [00:15, 33755.06it/s][A[A

493803it [00:15, 34346.64it/s][A[A

497258it [00:15, 34406.89it/s][A[A

500704it [00:15, 34230.05it/s][A[A

504131it [00:15, 33610.72it/s][A[A

507677it [00:16, 34144.42it/s][A[A
3999016it [02:27, 32660.21it/s][A

511098it [00:16, 34046.40it/s][A[A

514618it [00:16

1157757it [00:35, 32649.20it/s][A[A

1161119it [00:35, 32931.60it/s][A[A

1164628it [00:35, 33549.56it/s][A[A

1168097it [00:35, 33883.16it/s][A[A

1171493it [00:35, 28115.18it/s][A[A

1174962it [00:35, 29808.92it/s][A[A

1178089it [00:36, 28377.71it/s][A[A

1181391it [00:36, 29625.17it/s][A[A

1184495it [00:36, 30035.27it/s][A[A

1187940it [00:36, 31235.96it/s][A[A

1191364it [00:36, 32080.06it/s][A[A

1194717it [00:36, 32501.03it/s][A[A

1198251it [00:36, 33301.59it/s][A[A

1201611it [00:36, 33274.08it/s][A[A

1205133it [00:36, 33833.54it/s][A[A

1208682it [00:37, 34313.92it/s][A[A

1212127it [00:37, 33581.05it/s][A[A

1215498it [00:37, 33343.61it/s][A[A

1218878it [00:37, 33477.82it/s][A[A

1222404it [00:37, 33993.06it/s][A[A

1225810it [00:37, 33200.70it/s][A[A

1229139it [00:37, 32989.95it/s][A[A

1232445it [00:37, 32873.97it/s][A[A

1235949it [00:37, 33494.93it/s][A[A

1239362it [00:37, 33682.37it/s][A[A

1242735it [00:38, 33145.1

1758171it [00:59, 26960.93it/s][A[A

1761297it [00:59, 28120.40it/s][A[A

1764204it [00:59, 26658.89it/s][A[A

1766952it [00:59, 26250.10it/s][A[A

1769852it [00:59, 27018.47it/s][A[A

1772601it [00:59, 19980.34it/s][A[A

1774898it [00:59, 18290.16it/s][A[A

1776964it [01:00, 17324.19it/s][A[A

1778873it [01:00, 15676.65it/s][A[A

1780595it [01:00, 14590.94it/s][A[A

1782177it [01:00, 14238.31it/s][A[A

1783689it [01:00, 13902.41it/s][A[A

1785143it [01:00, 13828.24it/s][A[A

1787487it [01:00, 15767.22it/s][A[A

1790312it [01:00, 18176.73it/s][A[A

1792849it [01:00, 19865.37it/s][A[A

1795038it [01:01, 18642.57it/s][A[A

1798267it [01:01, 21349.05it/s][A[A

1800741it [01:01, 22263.47it/s][A[A

1803292it [01:01, 23146.92it/s][A[A

1806511it [01:01, 25276.82it/s][A[A

1809361it [01:01, 26163.89it/s][A[A

1812094it [01:01, 26134.76it/s][A[A

1815337it [01:01, 27750.33it/s][A[A

1818197it [01:01, 27936.96it/s][A[A

1821286it [01:02, 28761.8

2353237it [01:22, 31397.33it/s][A[A

2356388it [01:22, 31430.38it/s][A[A

2359651it [01:22, 31778.59it/s][A[A

2362871it [01:22, 31901.28it/s][A[A

2366164it [01:22, 32202.80it/s][A[A

2369393it [01:22, 32226.71it/s][A[A

2372619it [01:23, 31908.68it/s][A[A

2375813it [01:23, 31827.22it/s][A[A

2378998it [01:23, 31579.13it/s][A[A

2382352it [01:23, 32141.66it/s][A[A

2385570it [01:23, 31816.15it/s][A[A

2388904it [01:23, 32257.61it/s][A[A

2392134it [01:23, 31957.96it/s][A[A

2395365it [01:23, 32061.91it/s][A[A

2398606it [01:23, 32164.28it/s][A[A

2401825it [01:23, 32069.30it/s][A[A

2405149it [01:24, 32410.43it/s][A[A

2408392it [01:24, 32292.90it/s][A[A

2411623it [01:24, 31976.02it/s][A[A

2414823it [01:24, 30506.69it/s][A[A

2417890it [01:24, 30555.31it/s][A[A

2420957it [01:24, 30337.35it/s][A[A

2423999it [01:24, 30252.48it/s][A[A

2427030it [01:24, 29712.56it/s][A[A

2430121it [01:24, 30061.40it/s][A[A

2433133it [01:25, 28199.8

2966442it [01:44, 32540.14it/s][A[A

2969703it [01:45, 32559.71it/s][A[A

2972962it [01:45, 31499.29it/s][A[A

2976299it [01:45, 32037.74it/s][A[A

2979569it [01:45, 32233.21it/s][A[A

2982934it [01:45, 32644.12it/s][A[A

2986205it [01:45, 31925.69it/s][A[A

2989510it [01:45, 32253.35it/s][A[A

2992985it [01:45, 32963.74it/s][A[A

2996415it [01:45, 33351.92it/s][A[A

2999757it [01:45, 32434.30it/s][A[A

3003011it [01:46, 32443.21it/s][A[A

3006263it [01:46, 32175.78it/s][A[A

3009694it [01:46, 32786.23it/s][A[A

3012980it [01:46, 32512.47it/s][A[A

3016237it [01:46, 32512.95it/s][A[A

3019492it [01:46, 32153.93it/s][A[A

3022711it [01:46, 32027.34it/s][A[A

3026110it [01:46, 32591.73it/s][A[A

3029533it [01:46, 33066.31it/s][A[A

3032890it [01:46, 33212.96it/s][A[A

3036215it [01:47, 32483.62it/s][A[A

3039470it [01:47, 31762.75it/s][A[A

3042805it [01:47, 32222.15it/s][A[A

3046035it [01:47, 32153.61it/s][A[A

3049318it [01:47, 32353.1

3647415it [02:06, 32173.62it/s][A[A

3650678it [02:06, 32307.07it/s][A[A

3653914it [02:07, 31887.94it/s][A[A

3657281it [02:07, 32396.84it/s][A[A

3660563it [02:07, 32521.38it/s][A[A

3663856it [02:07, 32642.82it/s][A[A

3667139it [02:07, 31457.00it/s][A[A

3670296it [02:07, 30303.58it/s][A[A

3673394it [02:07, 30499.42it/s][A[A

3676478it [02:07, 30596.88it/s][A[A

3679783it [02:07, 31291.66it/s][A[A

3682923it [02:07, 31283.18it/s][A[A

3686059it [02:08, 31217.92it/s][A[A

3689394it [02:08, 31825.74it/s][A[A

3692606it [02:08, 31911.65it/s][A[A

3695802it [02:08, 31586.29it/s][A[A

3699004it [02:08, 31714.39it/s][A[A

3702179it [02:08, 31461.36it/s][A[A

3705588it [02:08, 32205.56it/s][A[A

3708815it [02:08, 31962.23it/s][A[A

3712197it [02:08, 32495.97it/s][A[A

3715453it [02:08, 31986.66it/s][A[A

3718658it [02:09, 31602.43it/s][A[A

3721959it [02:09, 32006.44it/s][A[A

3725165it [02:09, 31849.66it/s][A[A

3728527it [02:09, 32360.6

4333393it [02:29, 32522.06it/s][A[A

4336652it [02:29, 32130.07it/s][A[A

4339871it [02:29, 32105.11it/s][A[A

4343086it [02:29, 31156.21it/s][A[A

4346286it [02:29, 31404.45it/s][A[A

4349562it [02:29, 31798.18it/s][A[A

4353041it [02:29, 32639.35it/s][A[A

4356349it [02:29, 32768.56it/s][A[A

4359633it [02:30, 32669.18it/s][A[A

4363003it [02:30, 32970.46it/s][A[A

4366369it [02:30, 33172.97it/s][A[A

4369690it [02:30, 33003.42it/s][A[A

4372998it [02:30, 33025.97it/s][A[A

4376351it [02:30, 33174.88it/s][A[A

4379692it [02:30, 33244.87it/s][A[A

4383018it [02:30, 32806.16it/s][A[A

4386386it [02:30, 33062.55it/s][A[A

4389695it [02:30, 33067.60it/s][A[A

4393005it [02:31, 33075.24it/s][A[A

4396487it [02:31, 33579.62it/s][A[A

4399848it [02:31, 33504.56it/s][A[A

4403213it [02:31, 33547.25it/s][A[A

4406569it [02:31, 33255.15it/s][A[A

4409896it [02:31, 32733.50it/s][A[A

4413195it [02:31, 32809.67it/s][A[A

4416479it [02:31, 32326.8

End: 2020-02-09 16:07:12.275059, Elapsed time: 0:04:48.356702





# Paralized hashing PROCESSED_TRAIN/VAL_PATH and save to HASHED_TRAIN/VAL_PATH

In [7]:
nr_thread = 12

In [8]:
# split processed file into nr_thread csv_files
split(path=PROCESSED_TRAIN_PATH, nr_thread=nr_thread)
split(path=PROCESSED_VAL_PATH, nr_thread=nr_thread)

In [9]:
# parallelly hashing splited csv_files and save to nr_thread hashed csv_files
parallel_convert(
    "utils/2.py", 
    [PROCESSED_TRAIN_PATH, PROCESSED_VAL_PATH, HASHED_TRAIN_PATH, HASHED_VAL_PATH], 
    nr_thread)

In [10]:
# delete old splited processed files
delete(PROCESSED_TRAIN_PATH, nr_thread)
delete(PROCESSED_VAL_PATH, nr_thread)

# merge nr_thread hashed csv_files into 1 file
cat(HASHED_TRAIN_PATH, nr_thread)
cat(HASHED_VAL_PATH, nr_thread)

# delete old splited hashed csv_files
delete(HASHED_TRAIN_PATH, nr_thread)
delete(HASHED_VAL_PATH, nr_thread)

In [11]:
no_thread = 1 # number of thread for training
cmd = '../base/mark1 -r {0} -s {1} -t {2} {3} {4}'.format(LEARNING_RATE, no_thread, EPOCHS, HASHED_VAL_PATH, HASHED_TRAIN_PATH)
subprocess.call(cmd.split())

0