In [1]:
import os

dataPath = '/home/endre/Endre_finance_ml_data/'
splittedPath = dataPath + 'generated/WIKI_PRICES_SPLIT/'

numberOfFiles = 0
namePathDict = {}
for subdir, dirs, files in os.walk(splittedPath):
    for file in files:
        filePath = os.path.join(subdir, file)
        namePathDict[file.split('.')[0]] = filePath
        numberOfFiles += 1

print("Total number of files:", numberOfFiles)

Total number of files: 3188


In [2]:
import pandas as pd
# import numpy as np

def readTickerAsPandas(ticker):
    return pd.read_csv(splittedPath+ticker+'.csv')

In [None]:
class BadNumberInArrayError(ValueError):
    '''There was a bad number in the array'''

def percentChange(old, new):
    return ((new-old)/old)*100.0

import math
def checkBadNumbers(rangeName, array):
    for num in array:
        if (math.isnan(num) or not math.isfinite(num)):
            raise BadNumberInArrayError("Got a bad number in this array: {} -> {}".format(rangeName, array))

def featurizeTicker(rangeNames, features, labels, ticker, df):
    selected = df[["date", "adj_close", "adj_volume"]]
    
    # 21 days per month, 12 months = roughly a year.
    numDaysPeriod = 21
    numPeriods = 18
    jumpDays = 3  # Number of days to jump between each feature (reduce number of feature sets)

    rangeDays = numDaysPeriod * numPeriods

    # RANGE: "the entire year" - which spans the feature set
    # PERIOD: "each month"
    
    featureSetDate = None
    
    idx = 0
    for featureSetIdx in range(rangeDays-1, selected.shape[0] - numDaysPeriod, jumpDays):
        # DEBUG::
        idx += 1
        if (idx > rangeDays*43525324532):
            break
        # DEBUG END!

        # The "name" of this feature set
        featureSetDate = selected.iloc[featureSetIdx, 0]

        # The start of this feature set's range
        rangeStartIdx = featureSetIdx-rangeDays+1
        # The end of this feature set's range
        rangeEndIdx = featureSetIdx
        # The start of this feature set's range
        rangeStartDate = selected.iloc[rangeStartIdx, 0]
        
        # The volume average over the range preceeding this feature date
        averageVolumeOverRange = selected.iloc[rangeStartIdx:rangeEndIdx+1, 2].mean()

        rangeStartPrice = selected.iloc[rangeStartIdx, 1]

        ## NAME THIS RANGE
        rangeName = []
        rangeName.append(ticker)
        rangeName.append(featureSetDate)
        
        ## CREATE THE FEATURES INTO 'features'
        rangeFeatures = []
        for preceedingPeriodIdx in range(-numPeriods+1, 1):
            periodStartIdx = featureSetIdx + (preceedingPeriodIdx - 1) * numDaysPeriod + 1
            periodEndIdx = featureSetIdx + preceedingPeriodIdx * numDaysPeriod
            periodStartDate = selected.iloc[periodStartIdx, 0]
            periodEndDate = selected.iloc[periodEndIdx, 0]

            periodStartPrice = selected.iloc[periodStartIdx, 1]
            periodEndPrice = selected.iloc[periodEndIdx, 1]

            f_periodPriceChange = percentChange(periodStartPrice, periodEndPrice)
            f_fromStartRangeToPeriodEndPriceChange = percentChange(rangeStartPrice, periodEndPrice)

            averageVolumeOverPeriod = selected.iloc[periodStartIdx:periodEndIdx+1, 2].mean()
            f_averageVolumeRatio = averageVolumeOverPeriod / averageVolumeOverRange

            # print("   #{}: {} - {}, f_periodPriceChange:{}, f_fromStartRangeToPeriodEndPriceChange:{}, f_averageVolumeRatio:{}".\
            #      format(preceedingPeriodIdx, periodStartDate, periodEndDate, f_periodPriceChange, f_fromStartRangeToPeriodEndPriceChange, f_averageVolumeRatio))

            rangeFeatures.append(f_periodPriceChange)
            rangeFeatures.append(f_fromStartRangeToPeriodEndPriceChange)
            rangeFeatures.append(f_averageVolumeRatio)
        
        checkBadNumbers(rangeName, rangeFeatures)
        
        ## CREATE THE LABELS INTO 'labels'
        rangeLabels = []
        featureSetEndPrice = selected.iloc[featureSetIdx, 1]
        rangeLabels.append(percentChange(featureSetEndPrice, selected.iloc[featureSetIdx+5, 1]))
        rangeLabels.append(percentChange(featureSetEndPrice, selected.iloc[featureSetIdx+10, 1]))
        rangeLabels.append(percentChange(featureSetEndPrice, selected.iloc[featureSetIdx+15, 1]))
        rangeLabels.append(percentChange(featureSetEndPrice, selected.iloc[featureSetIdx+20, 1]))
        
        checkBadNumbers(rangeName, rangeLabels)

        # ----- Now all features and labes should be checked, so we can safely add it to the lists
        rangeNames.append(rangeName)
        features.append(rangeFeatures)
        labels.append(rangeLabels)
        
    # Return last feature set date
    return featureSetDate

In [None]:
from timeit import default_timer as timer

rangeNames = []
features = []
labels = []

total_start = timer()
idx_ticker = 0
total_tickers = len(namePathDict)
for ticker, path in namePathDict.items():
    idx_ticker += 1

    # DEBUG::
    if (idx_ticker > 100000):
        print ("EARLY BREAK!")
        break
    # DEBUG END!
    
    start = timer()
    df = readTickerAsPandas(ticker)
    try:
        lastFeatureSetDate = featurizeTicker(rangeNames, features, labels, ticker, df)
        millis = (timer()-start) * 1000
        print ("#{}/{}: {}: {}, last:{} :: took {} ms - current length: {}".\
               format(idx_ticker, total_tickers, ticker, df.shape, lastFeatureSetDate, millis, len(features)))
    except (ValueError, ZeroDivisionError) as err:
        print ("## Got error for Ticker '{}' - skipping rest of it.. \n  {}\n".format(ticker, err))

print("Total time: {}".format((timer() - total_start)*1000))


#1/3188: QTM: (4499, 14), last:2017-05-15 :: took 23890.460891998373 ms - current length: 1367
#2/3188: HTGC: (3027, 14), last:2017-05-17 :: took 15287.54881300847 ms - current length: 2244
#3/3188: ED: (11974, 14), last:2017-05-16 :: took 67251.1877809884 ms - current length: 6103
#4/3188: FIVN: (807, 14), last:2017-05-17 :: took 2379.3421770096757 ms - current length: 6240
#5/3188: CVEO: (778, 14), last:2017-05-16 :: took 2200.251449015923 ms - current length: 6367
#6/3188: NCI: (5210, 14), last:2017-05-15 :: took 28090.391563018784 ms - current length: 7971
#7/3188: LDOS: (2685, 14), last:2017-05-17 :: took 13455.646590009565 ms - current length: 8734
#8/3188: IGTE: (4665, 14), last:2015-06-02 :: took 24844.920207979158 ms - current length: 10157
#9/3188: AAPL: (9207, 14), last:2017-05-17 :: took 51244.01388000115 ms - current length: 13094
#10/3188: HEOP: (4515, 14), last:2017-03-02 :: took 24079.898925992893 ms - current length: 14467
#11/3188: BRKR: (4243, 14), last:2017-05-16 ::

#86/3188: BIOL: (6194, 14), last:2017-05-15 :: took 33893.830717977835 ms - current length: 117480
#87/3188: MBII: (976, 14), last:2017-05-16 :: took 3366.839709022315 ms - current length: 117673
#88/3188: ESCA: (6377, 14), last:2017-05-15 :: took 34884.41370698274 ms - current length: 119666
#89/3188: CSRA: (390, 14), last:None :: took 4.9195569881703705 ms - current length: 119666
#90/3188: KRC: (5129, 14), last:2017-05-15 :: took 27652.316761988914 ms - current length: 121243
#91/3188: VECO: (5678, 14), last:2017-05-15 :: took 30676.44952001865 ms - current length: 123003
#92/3188: MATX: (11132, 14), last:2017-05-15 :: took 62839.03127600206 ms - current length: 126581
#93/3188: TRXC: (6194, 14), last:2017-05-15 :: took 34089.27351501188 ms - current length: 128513
#94/3188: GLOG: (1312, 14), last:2017-05-16 :: took 5329.112462000921 ms - current length: 128818
#95/3188: NMRX: (5864, 14), last:2017-05-15 :: took 31850.896338000894 ms - current length: 130640
#96/3188: FRGI: (1294, 1

#170/3188: Z: (463, 14), last:2017-05-16 :: took 393.22624399210326 ms - current length: 234407
#171/3188: MIL: (4976, 14), last:2016-01-11 :: took 26657.39298099652 ms - current length: 235933
#172/3188: CI: (8881, 14), last:2017-05-16 :: took 49426.07961400063 ms - current length: 238761
#173/3188: ANIP: (4306, 14), last:2017-05-16 :: took 23105.716496997047 ms - current length: 240064
#174/3188: NBCB: (414, 14), last:2014-12-01 :: took 113.01503697177395 ms - current length: 240070
#175/3188: MTSC: (6862, 14), last:2017-05-16 :: took 37782.99039599369 ms - current length: 242225
#176/3188: AVB: (5859, 14), last:2017-05-17 :: took 32037.983383983374 ms - current length: 244046
#177/3188: RTRX: (1159, 14), last:2017-05-16 :: took 4451.1760890018195 ms - current length: 244300
#178/3188: IGT: (6862, 14), last:2017-05-16 :: took 37786.75387401017 ms - current length: 246455
#179/3188: SCBT: (4380, 14), last:2014-05-28 :: took 23260.27264801087 ms - current length: 247783
#180/3188: CIX:

#254/3188: UAM: (6827, 14), last:2017-03-24 :: took 37625.33094495302 ms - current length: 346546
#255/3188: PTIE: (4258, 14), last:2017-05-16 :: took 22606.573417957406 ms - current length: 347833
#256/3188: CCXI: (1348, 14), last:2017-05-16 :: took 5604.942601989023 ms - current length: 348150
#257/3188: DELL: (6388, 14), last:2014-11-12 :: took 35071.2708819774 ms - current length: 350147
#258/3188: ANGI: (1403, 14), last:2017-05-15 :: took 5863.756015023682 ms - current length: 350482
#259/3188: RCL: (6080, 14), last:2017-05-15 :: took 33253.321937983856 ms - current length: 352376
#260/3188: MEI: (6863, 14), last:2017-05-15 :: took 37736.35808401741 ms - current length: 354531
#261/3188: MDRX: (4504, 14), last:2017-05-16 :: took 24006.465065001976 ms - current length: 355900
#262/3188: RYN: (5874, 14), last:2017-05-17 :: took 32099.908317963127 ms - current length: 357726
#263/3188: CSS: (6362, 14), last:2017-05-15 :: took 34852.407007012516 ms - current length: 359714
#264/3188: 

#337/3188: TYC: (7295, 14), last:2016-08-02 :: took 40462.15808700072 ms - current length: 458064
#338/3188: ASEI: (8070, 14), last:2016-08-10 :: took 45959.071863035206 ms - current length: 460622
#339/3188: CMCSK: (6482, 14), last:2015-11-09 :: took 35611.384253017604 ms - current length: 462650
#340/3188: LMNX: (4326, 14), last:2017-05-17 :: took 22918.561102997046 ms - current length: 463960
#341/3188: DFZ: (5661, 14), last:2014-08-05 :: took 30877.13919702219 ms - current length: 465715
#342/3188: SBH: (2662, 14), last:2017-05-16 :: took 13210.859593993519 ms - current length: 466470
#343/3188: OSIS: (4959, 14), last:2017-05-17 :: took 26659.531180979684 ms - current length: 467991
#344/3188: SRI: (4953, 14), last:2017-05-17 :: took 26759.455236024223 ms - current length: 469510
#345/3188: ARNA: (4248, 14), last:2017-05-17 :: took 22646.733761997893 ms - current length: 470794
#346/3188: TMHC: (1056, 14), last:2017-05-17 :: took 3894.4265859900042 ms - current length: 471014
#347/

#420/3188: FTK: (2994, 14), last:2017-05-17 :: took 15047.217023035046 ms - current length: 578273
#421/3188: SYY: (11129, 14), last:2017-05-15 :: took 62373.10339498799 ms - current length: 581850
#422/3188: MBI: (7552, 14), last:2017-05-16 :: took 41723.880456993356 ms - current length: 584235
#423/3188: HHC: (1664, 14), last:2017-05-15 :: took 7359.324651013594 ms - current length: 584657
#424/3188: BJRI: (5206, 14), last:2017-05-16 :: took 27928.481808979996 ms - current length: 586260
#425/3188: PRXL: (5429, 14), last:2017-05-15 :: took 29263.333349023014 ms - current length: 587937
#426/3188: GS: (4561, 14), last:2017-05-16 :: took 24212.160642025992 ms - current length: 589325
#427/3188: PPO: (2036, 14), last:2015-07-23 :: took 9488.412596983835 ms - current length: 589871
#428/3188: END: (3181, 14), last:2014-09-05 :: took 16165.705919032916 ms - current length: 590799
#429/3188: UCTT: (3331, 14), last:2017-05-16 :: took 17088.874007982668 ms - current length: 591777
#430/3188:

#503/3188: UFS: (2590, 14), last:2017-05-16 :: took 12869.653777976055 ms - current length: 693052
#504/3188: EVDY: (679, 14), last:2016-11-02 :: took 1641.4234869880602 ms - current length: 693146
#505/3188: CBSH: (8264, 14), last:2017-05-15 :: took 45782.49707299983 ms - current length: 695768
#506/3188: ROLL: (2984, 14), last:2017-05-15 :: took 15064.597361022606 ms - current length: 696630
#507/3188: VNO: (7424, 14), last:2017-05-15 :: took 40751.33633200312 ms - current length: 698972
#508/3188: ASH: (8624, 14), last:2017-05-15 :: took 47876.530533016194 ms - current length: 701714
#509/3188: CPLA: (2667, 14), last:2017-05-17 :: took 13163.035876990762 ms - current length: 702471
#510/3188: TBPH: (778, 14), last:2017-05-16 :: took 2214.376902033109 ms - current length: 702598
#511/3188: HRS: (8943, 14), last:2017-05-17 :: took 49614.50214398792 ms - current length: 705447
#512/3188: ACTG: (3651, 14), last:2017-05-17 :: took 18876.103635004256 ms - current length: 706532
#513/3188:

#586/3188: KAI: (6201, 14), last:2017-05-17 :: took 33697.48965703184 ms - current length: 810272
#587/3188: EOG: (6981, 14), last:2017-05-17 :: took 38149.21086799586 ms - current length: 812467
#588/3188: NBL: (8942, 14), last:2017-05-15 :: took 49555.07339100586 ms - current length: 815315
#589/3188: CRUS: (6862, 14), last:2017-05-16 :: took 37570.92676596949 ms - current length: 817470
#590/3188: CCNE: (5299, 14), last:2017-05-16 :: took 28435.543889994733 ms - current length: 819104
#591/3188: GD: (10205, 14), last:2017-05-15 :: took 57117.88905400317 ms - current length: 822373
#592/3188: ZQK: (6416, 14), last:2015-08-06 :: took 34913.58723497251 ms - current length: 824379
#593/3188: SNDK: (5162, 14), last:2016-04-08 :: took 27750.780217000283 ms - current length: 825967
#594/3188: SPAR: (6862, 14), last:2017-05-16 :: took 37482.43282799376 ms - current length: 828122
#595/3188: PKG: (4374, 14), last:2017-05-17 :: took 23138.853878015652 ms - current length: 829448
#596/3188: WB

#669/3188: AROW: (8264, 14), last:2017-05-15 :: took 45707.6104079606 ms - current length: 940783
#670/3188: WFM: (6399, 14), last:2017-05-17 :: took 34944.28775599226 ms - current length: 942784
#671/3188: PRA: (6497, 14), last:2017-05-15 :: took 35301.596453995444 ms - current length: 944817
#672/3188: GSIG: (4312, 14), last:2016-04-11 :: took 22704.487232025713 ms - current length: 946122
#673/3188: SIAL: (6466, 14), last:2015-10-19 :: took 35378.74679500237 ms - current length: 948145
#674/3188: GXP: (7464, 14), last:2017-05-17 :: took 40956.250722985715 ms - current length: 950501
#675/3188: HSTM: (4323, 14), last:2017-05-17 :: took 22756.650868977886 ms - current length: 951810
#676/3188: GSM: (1985, 14), last:2017-05-15 :: took 9199.166946054902 ms - current length: 952339
#677/3188: CUTR: (3327, 14), last:2017-05-17 :: took 16920.55724799866 ms - current length: 953316
#678/3188: NYRT: (800, 14), last:2017-05-15 :: took 2329.2270520469174 ms - current length: 953450
#679/3188: 

#751/3188: LDRH: (694, 14), last:2016-06-09 :: took 1713.3171989698894 ms - current length: 1059422
#752/3188: Q: (1035, 14), last:2017-05-17 :: took 3737.352803000249 ms - current length: 1059635
#753/3188: MDR: (8690, 14), last:2017-05-15 :: took 48054.02207100997 ms - current length: 1062399
#754/3188: USTR: (6345, 14), last:2015-04-29 :: took 34617.11529002059 ms - current length: 1064382
#755/3188: VOLC: (2183, 14), last:2015-01-13 :: took 10334.335535997525 ms - current length: 1064977
#756/3188: SWAY: (493, 14), last:2015-12-02 :: took 557.0665279519744 ms - current length: 1065009
#757/3188: HAWK: (1049, 14), last:2017-05-15 :: took 3777.478552015964 ms - current length: 1065226
#758/3188: FIX: (5026, 14), last:2017-05-16 :: took 26803.325777989812 ms - current length: 1066769
#759/3188: WRI: (7424, 14), last:2017-05-15 :: took 40639.09660704667 ms - current length: 1069111
#760/3188: CRM: (3270, 14), last:2017-05-17 :: took 16709.582129027694 ms - current length: 1070069
#761/

#834/3188: SMCI: (2574, 14), last:2017-05-17 :: took 12614.107622008305 ms - current length: 1173018
#835/3188: KBH: (7464, 14), last:2017-05-17 :: took 40982.8238520422 ms - current length: 1175374
#836/3188: VOD: (7182, 14), last:2017-05-17 :: took 39318.30197299132 ms - current length: 1177636
#837/3188: NRG: (3409, 14), last:2017-05-16 :: took 17468.3497310034 ms - current length: 1178640
#838/3188: LO: (1764, 14), last:2015-05-12 :: took 7962.897585006431 ms - current length: 1179096
#839/3188: CIFC: (2871, 14), last:2016-10-21 :: took 14320.034951029811 ms - current length: 1179921
#840/3188: STZ: (6362, 14), last:2017-05-15 :: took 34709.55289399717 ms - current length: 1181909
#841/3188: INSM: (4288, 14), last:2017-05-16 :: took 22658.674387959763 ms - current length: 1183206
#842/3188: CATY: (6677, 14), last:2017-05-15 :: took 36434.78521000361 ms - current length: 1185299
#843/3188: NASB: (3909, 14), last:2015-02-04 :: took 20414.959779998753 ms - current length: 1186470
#844

#917/3188: TEL: (2521, 14), last:2017-05-16 :: took 12297.849456954282 ms - current length: 1293207
#918/3188: WBA: (8058, 14), last:2017-05-17 :: took 44442.92713497998 ms - current length: 1295761
#919/3188: ALGT: (2648, 14), last:2017-05-15 :: took 13061.454463982955 ms - current length: 1296511
#920/3188: FCE_A: (8483, 14), last:2017-05-15 :: took 46813.74401104404 ms - current length: 1299206
#921/3188: TQNT: (5301, 14), last:2014-12-01 :: took 28532.243056979496 ms - current length: 1300841
#922/3188: CSE: (2685, 14), last:2014-03-07 :: took 13408.685871050693 ms - current length: 1301604
#923/3188: CENTA: (2610, 14), last:2017-05-17 :: took 12880.893919034861 ms - current length: 1302342
#924/3188: POWI: (4909, 14), last:2017-05-16 :: took 26163.544180977624 ms - current length: 1303846
#925/3188: RLI: (7427, 14), last:2017-05-15 :: took 40819.141154002864 ms - current length: 1306189
#926/3188: BKU: (1607, 14), last:2017-05-15 :: took 7046.610203047749 ms - current length: 1306

#999/3188: GRMN: (4155, 14), last:2017-05-17 :: took 21890.507475007325 ms - current length: 1415299
#1000/3188: TWOU: (812, 14), last:2017-05-15 :: took 2398.776616959367 ms - current length: 1415437
#1001/3188: CGNX: (6978, 14), last:2017-05-17 :: took 38112.61451902101 ms - current length: 1417631
#1002/3188: COWN: (2752, 14), last:2017-05-16 :: took 13685.22792600561 ms - current length: 1418416
#1003/3188: KO: (13961, 14), last:2017-05-15 :: took 78756.95531303063 ms - current length: 1422937
#1004/3188: PLUG: (4436, 14), last:2017-05-15 :: took 23458.229339972604 ms - current length: 1424283
#1005/3188: OCN: (5217, 14), last:2017-05-17 :: took 27996.74351804424 ms - current length: 1425890
#1006/3188: ATW: (8263, 14), last:2017-05-16 :: took 45460.72533802362 ms - current length: 1428512
#1007/3188: GPX: (6362, 14), last:2017-05-15 :: took 34420.4110269784 ms - current length: 1430500
#1008/3188: JAKK: (5318, 14), last:2017-05-15 :: took 28534.81149399886 ms - current length: 143

#1081/3188: PFBC: (4486, 14), last:2017-05-16 :: took 23822.882812994067 ms - current length: 1528579
#1082/3188: THOR: (4978, 14), last:2015-09-08 :: took 26566.379228024743 ms - current length: 1530106
#1083/3188: PLT: (5894, 14), last:2017-05-15 :: took 31975.781360000838 ms - current length: 1531938
#1084/3188: PNW: (8298, 14), last:2017-05-17 :: took 45812.77568399673 ms - current length: 1534572
#1085/3188: EIX: (9448, 14), last:2017-05-16 :: took 52246.1540660006 ms - current length: 1537589
#1086/3188: EPL: (1179, 14), last:2014-04-30 :: took 4509.662524971645 ms - current length: 1537850
#1087/3188: GTLS: (2743, 14), last:2017-05-16 :: took 13558.526779001113 ms - current length: 1538632
#1088/3188: RBCN: (2412, 14), last:2017-05-17 :: took 11634.809799026698 ms - current length: 1539304
#1089/3188: MSCC: (6862, 14), last:2017-05-16 :: took 37397.59209298063 ms - current length: 1541459
#1090/3188: CRRS: (3984, 14), last:2015-01-30 :: took 20900.738433003426 ms - current lengt

#1163/3188: CSCO: (6862, 14), last:2017-05-16 :: took 37487.615799997 ms - current length: 1644407
#1164/3188: NEOG: (5879, 14), last:2017-05-15 :: took 31796.824051998556 ms - current length: 1646234
#1165/3188: PBY: (7082, 14), last:2015-12-30 :: took 38797.96547401929 ms - current length: 1648462
#1166/3188: FCN: (5313, 14), last:2017-05-17 :: took 28595.895110978745 ms - current length: 1650101
#1167/3188: MCC: (1612, 14), last:2017-05-16 :: took 7051.387081970461 ms - current length: 1650506
#1168/3188: BRLI: (5476, 14), last:2015-07-21 :: took 29394.878691004124 ms - current length: 1652199
#1169/3188: WWAV: (1120, 14), last:2017-03-10 :: took 4182.060985011049 ms - current length: 1652440
#1170/3188: HWCC: (2771, 14), last:2017-05-15 :: took 13696.174456970766 ms - current length: 1653231
#1171/3188: AVNR: (6220, 14), last:2014-12-09 :: took 33682.27788503282 ms - current length: 1655172
#1172/3188: MDAS: (2044, 14), last:2015-12-23 :: took 9575.318095972762 ms - current length:

#1245/3188: VPG: (1759, 14), last:2017-05-16 :: took 7912.142369954381 ms - current length: 1761852
#1246/3188: STAR: (6950, 14), last:2017-05-15 :: took 38225.68390896777 ms - current length: 1764036
#1247/3188: THRX: (2836, 14), last:2015-12-07 :: took 14163.193102984224 ms - current length: 1764849
#1248/3188: JWN: (7801, 14), last:2017-05-16 :: took 42919.16033101734 ms - current length: 1767317
#1249/3188: SRDX: (4855, 14), last:2017-05-16 :: took 25798.79318503663 ms - current length: 1768803
#1250/3188: SAIC: (946, 14), last:2017-05-16 :: took 3196.9521319842897 ms - current length: 1768986
#1251/3188: FELE: (6862, 14), last:2017-05-16 :: took 37372.4936300423 ms - current length: 1771141
#1252/3188: HERO: (2675, 14), last:2016-05-11 :: took 13214.399032993242 ms - current length: 1771900
#1253/3188: FIS: (4023, 14), last:2017-05-17 :: took 20995.07762194844 ms - current length: 1773109
#1254/3188: LEN: (7464, 14), last:2017-05-17 :: took 41211.14093699725 ms - current length: 1

#1327/3188: HLSS: (795, 14), last:2015-03-27 :: took 2306.3542019808665 ms - current length: 1890268
#1328/3188: BBBY: (6306, 14), last:2017-05-17 :: took 34363.045364036225 ms - current length: 1892238
#1329/3188: CHS: (6104, 14), last:2017-05-15 :: took 33105.31587200239 ms - current length: 1894140
#1330/3188: GTAT: (1565, 14), last:2014-09-11 :: took 6781.813728041016 ms - current length: 1894529
#1331/3188: BMS: (8263, 14), last:2017-05-16 :: took 45765.39447600953 ms - current length: 1897151
#1332/3188: PLOW: (1793, 14), last:2017-05-15 :: took 8161.784472002182 ms - current length: 1897616
#1333/3188: ONVO: (1344, 14), last:2017-05-17 :: took 5536.841482971795 ms - current length: 1897932
#1334/3188: UEC: (2569, 14), last:2017-05-16 :: took 12584.784123988356 ms - current length: 1898656
#1335/3188: MCP: (1235, 14), last:2015-05-21 :: took 4828.27601704048 ms - current length: 1898935
#1336/3188: RVBD: (2162, 14), last:2015-03-23 :: took 10220.363413973246 ms - current length: 

#1409/3188: EPZM: (1020, 14), last:2017-05-17 :: took 3646.836733969394 ms - current length: 1998006
#1410/3188: SNHY: (5144, 14), last:2017-05-15 :: took 27501.552882953547 ms - current length: 1999588
#1411/3188: CNS: (3233, 14), last:2017-05-15 :: took 16403.07511301944 ms - current length: 2000533
#1412/3188: CELG: (6862, 14), last:2017-05-16 :: took 37384.51197097311 ms - current length: 2002688
#1413/3188: TWTC: (3894, 14), last:2014-10-02 :: took 20302.41261899937 ms - current length: 2003854
#1414/3188: IRC: (2996, 14), last:2016-02-25 :: took 15082.121829967946 ms - current length: 2004720
#1415/3188: BBT: (6862, 14), last:2017-05-16 :: took 37571.05764100561 ms - current length: 2006875
#1416/3188: PEBO: (6132, 14), last:2017-05-17 :: took 33323.468722985126 ms - current length: 2008787
#1417/3188: ORCL: (7883, 14), last:2017-05-15 :: took 43419.89352100063 ms - current length: 2011282
#1418/3188: EPAY: (4616, 14), last:2017-05-15 :: took 24485.279274987988 ms - current lengt

#1491/3188: GPT: (3243, 14), last:2017-05-17 :: took 16507.26133096032 ms - current length: 2114984
#1492/3188: SFE: (7427, 14), last:2017-05-15 :: took 40808.70007100748 ms - current length: 2117327
#1493/3188: MLI: (6630, 14), last:2017-05-17 :: took 36102.199140994344 ms - current length: 2119405
#1494/3188: MXIM: (6862, 14), last:2017-05-16 :: took 37396.83086599689 ms - current length: 2121560
#1495/3188: FRT: (11096, 14), last:2017-05-15 :: took 61920.869196997955 ms - current length: 2125126
#1496/3188: EA: (6862, 14), last:2017-05-16 :: took 37549.2053849739 ms - current length: 2127281
#1497/3188: FLR: (4160, 14), last:2017-05-15 :: took 21785.69021302974 ms - current length: 2128535
#1498/3188: PTRY: (3966, 14), last:2015-02-12 :: took 20755.346138961613 ms - current length: 2129725
#1499/3188: IXYS: (4713, 14), last:2017-05-17 :: took 25039.10299198469 ms - current length: 2131164
#1500/3188: UMH: (5926, 14), last:2017-05-16 :: took 32092.556348012295 ms - current length: 21

#1573/3188: WETF: (6118, 14), last:2017-05-16 :: took 33133.79945495399 ms - current length: 2233583
#1574/3188: CBB: (7581, 14), last:2017-05-17 :: took 41750.75961102266 ms - current length: 2235978
#1575/3188: TCO: (6169, 14), last:2017-05-16 :: took 33494.414946006145 ms - current length: 2237902
#1576/3188: DOC: (986, 14), last:2017-05-15 :: took 3431.1977970064618 ms - current length: 2238098
#1577/3188: CVS: (8193, 14), last:2017-05-17 :: took 45258.07053397875 ms - current length: 2240697
#1578/3188: CNMD: (6862, 14), last:2017-05-16 :: took 37555.389995046426 ms - current length: 2242852
#1579/3188: AMP: (2959, 14), last:2017-05-16 :: took 14998.81172599271 ms - current length: 2243706
#1580/3188: CUBE: (3185, 14), last:2017-05-15 :: took 16177.873610984534 ms - current length: 2244635
#1581/3188: ARQL: (5202, 14), last:2017-05-17 :: took 27860.138575953897 ms - current length: 2246237
#1582/3188: ROVI: (4905, 14), last:2016-08-08 :: took 26247.58148001274 ms - current length:

#1655/3188: PAYC: (800, 14), last:2017-05-15 :: took 2343.6129839974456 ms - current length: 2347098
#1656/3188: REN: (2441, 14), last:2017-05-15 :: took 11815.255468012765 ms - current length: 2347779
#1657/3188: QTS: (929, 14), last:2017-05-15 :: took 3081.072922970634 ms - current length: 2347956
#1658/3188: SKX: (4536, 14), last:2017-05-17 :: took 24030.133767984807 ms - current length: 2349336
#1659/3188: EMC: (6986, 14), last:2016-08-03 :: took 38243.53425903246 ms - current length: 2351532
#1660/3188: HMHC: (903, 14), last:2017-05-17 :: took 2961.3207390066236 ms - current length: 2351701
#1661/3188: AFOP: (3901, 14), last:2016-05-04 :: took 20350.19043099601 ms - current length: 2352869
#1662/3188: COCO: (4032, 14), last:2015-01-14 :: took 21121.060572972056 ms - current length: 2354081
#1663/3188: STC: (6860, 14), last:2017-05-15 :: took 37443.52939201053 ms - current length: 2356235
#1664/3188: SMA: (2515, 14), last:2014-11-03 :: took 12293.460376036819 ms - current length: 2

#1737/3188: GCI: (8021, 14), last:2017-03-22 :: took 44204.76237899857 ms - current length: 2466633
#1738/3188: SFNC: (6202, 14), last:2017-05-16 :: took 33580.51347901346 ms - current length: 2468568
#1739/3188: HCT: (198, 14), last:None :: took 2.586480986792594 ms - current length: 2468568
#1740/3188: MOG_A: (6379, 14), last:2017-05-16 :: took 34782.16733701993 ms - current length: 2470562
#1741/3188: CWST: (4940, 14), last:2017-05-15 :: took 26427.555554022547 ms - current length: 2472076
#1742/3188: LTM: (2763, 14), last:2017-05-17 :: took 13775.126337015536 ms - current length: 2472865
#1743/3188: PRAA: (3676, 14), last:2017-05-16 :: took 19106.8292070413 ms - current length: 2473958
#1744/3188: ASNA: (6862, 14), last:2017-05-16 :: took 37711.13356301794 ms - current length: 2476113
#1745/3188: VMI: (6862, 14), last:2017-05-16 :: took 37552.373607002664 ms - current length: 2478268
#1746/3188: HME: (5336, 14), last:2015-09-03 :: took 28815.326979965903 ms - current length: 247991

In [None]:
print("Total time: {}".format((timer() - total_start)*1000))
print("RangeNames:{}, Features:{}, Labels:{}".format(len(rangeNames), len(features), len(labels)))

In [None]:
import pickle

result = {'rangeNames': rangeNames, 'features': features, 'labels': labels}
pickle.dump(result, open("RangeNamesFeaturesAndLabels.pickle", "wb"))

In [None]:
idx_ticker