In [11]:
import pandas as pd 
from collections import defaultdict 
import numpy as np 
import matplotlib.pyplot as plt
from os import walk 
from pprint import pprint 
import operator 

%matplotlib inline 


In [3]:
# from supporting_objects.StockSummary import StockSummary
from collections import defaultdict

class StockSummary:
    def __init__(self):
        self.stock_symbol = None
        self.stock_name = None
        self.stock_mean_value_per_date = defaultdict(float)
        self.stock_mean_volume_per_date = defaultdict(float)
        self.stock_mean_value_per_year = defaultdict(float)
        self.stock_price_over_year = None 
        self.year_over_year_profit = None 
        self.summation_of_profits = None


In [4]:
all_stock_data = "/Users/pyuvraj/CCPP/data_for_profit_from_stock/all_stocks_historical_prices/symbols_valid_meta.csv"
stock_info_directory = "/Users/pyuvraj/CCPP/data_for_profit_from_stock/all_stocks_historical_prices/stocks"

In [5]:
class FindHighGrowthStocksNASDAQ:
    def __init__(self, stock_csv_folder, stock_description_file):
        self.stock_csv_folder = stock_csv_folder
        self.stock_description_file = stock_description_file
        self.stock_csv_absoulte_path = {}
        self.data = self.ReadAllStockData()
        self.delta_days = 60
        self.data_per_company = defaultdict(StockSummary)
        
    def ReadAllStockData(self):
        f = []
        stock_file_in_location = defaultdict(str)
        for (dirpath, dirnames, filenames) in walk(stock_info_directory):
            f.extend(filenames)
        for file_name in f:
            stock_file_in_location[file_name] = stock_info_directory + "/" + file_name
        return stock_file_in_location
    
    def ReadAndIteratePerStock(self):
        i = 0
        for stock_code, absolute_location in self.data.items():
            i += 1
            stock_csv_data = pd.read_csv(absolute_location)
            stock_csv_data_processed = self.AddDateMonthYearColumns(stock_csv_data)
            self.data_per_company[stock_code] = self.ProcessOverallDataPerStock(stock_code, stock_csv_data_processed)
#             print(stock_code, self.data_per_company['stock_code'].summation_of_profits)
#             if i > 200: break

    def AddDateMonthYearColumns(self, data):
        Year, Month, Day = [], [], []
        for index in data['Date']:
            year = int(index[0:4])
            month = int(index[5:7])
            day = int(index[8:10])
            Year.append(year)
            Month.append(month)
            Day.append(day)
        data['Year'] = Year
        data['Month'] = Month
        data['Day'] = Day
        return data
        
    def GetStatisticsFor30Days(self, precise_data):
        avg_price = np.mean(precise_data['Close'])
        avg_volume = np.mean(precise_data['Volume'])
        date = np.min(precise_data['Date'])
        return date, avg_price, avg_volume
    
    def StockPriceMeanPerYear(self, data):
        year_over_year_profit = defaultdict(float)
        stock_price_over_year = defaultdict(float)
        
        stock_price_over_year['2016'] = np.mean(data[data['Year']==2016]['Close'])
        stock_price_over_year['2017'] = np.mean(data[data['Year']==2017]['Close'])
        stock_price_over_year['2018'] = np.mean(data[data['Year']==2018]['Close'])
        stock_price_over_year['2019'] = np.mean(data[data['Year']==2019]['Close'])
        stock_price_over_year['2020'] = np.mean(data[data['Year']==2020]['Close'])
        
        year_over_year_profit['2017'] = 100.0*((stock_price_over_year['2017']-stock_price_over_year['2016'])/stock_price_over_year['2016'])
        year_over_year_profit['2018'] = 100.0*((stock_price_over_year['2018']-stock_price_over_year['2017'])/stock_price_over_year['2017'])
        year_over_year_profit['2019'] = 100.0*((stock_price_over_year['2019']-stock_price_over_year['2018'])/stock_price_over_year['2018'])                    
        year_over_year_profit['2020'] = 100.0*((stock_price_over_year['2020']-stock_price_over_year['2019'])/stock_price_over_year['2019'])
        return stock_price_over_year, year_over_year_profit, sum(year_over_year_profit.values())
    
    def ProcessOverallDataPerStock(self, stock_code, data):
        close = len(data)
        start = close - self.delta_days
        stock_summary = StockSummary()
        stock_summary.stock_symbol = stock_code
        stock_summary.stock_price_over_year, stock_summary.year_over_year_profit, stock_summary.summation_of_profits = self.StockPriceMeanPerYear(data)
        while start > 0:
            date, mean_value, mean_volume = self.GetStatisticsFor30Days(data[start:close])
            stock_summary.stock_mean_value_per_date[date] = mean_value
            stock_summary.stock_mean_volume_per_date[date] = mean_volume
            if data['Date'][start] < '2016-01-01':
                break
            close = start 
            start = start - self.delta_days
        return stock_summary
    
    def RetrieveData(self):
        return self.data_per_company
        
            
        

        

In [6]:
find_high_growth_stocks_nasdaq = FindHighGrowthStocksNASDAQ(stock_info_directory, all_stock_data)
find_high_growth_stocks_nasdaq.ReadAndIteratePerStock()
user_data = find_high_growth_stocks_nasdaq.RetrieveData()

In [7]:
user_data.items()

dict_items([('RIV.csv', <__main__.StockSummary object at 0x7fc670282c40>), ('ANTE.csv', <__main__.StockSummary object at 0x7fc670282970>), ('CSCO.csv', <__main__.StockSummary object at 0x7fc670282d60>), ('PRI.csv', <__main__.StockSummary object at 0x7fc670282400>), ('NZF.csv', <__main__.StockSummary object at 0x7fc6804b92b0>), ('HLNE.csv', <__main__.StockSummary object at 0x7fc6804b90d0>), ('UNT.csv', <__main__.StockSummary object at 0x7fc6804b9160>), ('HUBS.csv', <__main__.StockSummary object at 0x7fc680f4a190>), ('GPL.csv', <__main__.StockSummary object at 0x7fc6804b9250>), ('SBGI.csv', <__main__.StockSummary object at 0x7fc680f4a0d0>), ('UFCS.csv', <__main__.StockSummary object at 0x7fc6804b9040>), ('TEAF.csv', <__main__.StockSummary object at 0x7fc661314a60>), ('RJZ.csv', <__main__.StockSummary object at 0x7fc6613149d0>), ('GFED.csv', <__main__.StockSummary object at 0x7fc661314820>), ('AIRT.csv', <__main__.StockSummary object at 0x7fc661314070>), ('SITC.csv', <__main__.StockSummar

In [8]:
temp_dict = defaultdict(float)
for key, value in user_data.items():
#     print(len(value.stock_price_over_year))
    if(value.summation_of_profits is None):
        continue
    if np.isnan(value.summation_of_profits):
        continue
    if len(value.stock_price_over_year) != 5:
        continue
    if value.year_over_year_profit['2020'] > 10 and value.year_over_year_profit['2019'] > 10:
        continue
    temp_dict[key] = value.summation_of_profits
    print(value.year_over_year_profit)
# print(temp_dict)
# a = sorted(temp_dict.items(), key=lambda x:x[1], reverse=True)
# print(a)

defaultdict(<class 'float'>, {'2017': 6.550871191375593, '2018': -1.6515324278688372, '2019': -13.592615562410026, '2020': -7.493900452982453})
defaultdict(<class 'float'>, {'2017': -41.98910397272894, '2018': -71.97014981289843, '2019': -52.387813330746866, '2020': -38.28350051959325})
defaultdict(<class 'float'>, {'2017': 15.121614901742014, '2018': 33.4291785574914, '2019': 14.355910310269076, '2020': -13.420810189455437})
defaultdict(<class 'float'>, {'2017': 53.960591431309304, '2018': 28.99866893632346, '2019': 14.953847505668602, '2020': -6.929330851244485})
defaultdict(<class 'float'>, {'2017': -1.5486026565266349, '2018': -4.488266852557061, '2019': 9.525374983819907, '2020': 0.7720761645474258})
defaultdict(<class 'float'>, {'2017': 41.386731738122116, '2018': 9.681484994976934, '2019': -63.255736655492754, '2020': -95.53143290074802})
defaultdict(<class 'float'>, {'2017': 42.94734169190184, '2018': 75.67160602836975, '2019': 35.446553368387846, '2020': -0.6050657254846593})


defaultdict(<class 'float'>, {'2017': -4.413867146557098, '2018': -3.037135412328222, '2019': 9.12318203781291, '2020': -3.6268230433253965})
defaultdict(<class 'float'>, {'2017': 41.215304897820545, '2018': 11.950288196076707, '2019': -4.947046423060133, '2020': -1.4738666555761926})
defaultdict(<class 'float'>, {'2017': 12.96858981095689, '2018': -4.723294290725183, '2019': -4.007348503202568, '2020': -3.71611908780514})
defaultdict(<class 'float'>, {'2017': 170.1454161296786, '2018': -8.512969019352752, '2019': 11996.603227179416, '2020': 0.47768422291234486})
defaultdict(<class 'float'>, {'2017': 24.507658809682198, '2018': 4.210151183546773, '2019': 16.814512289106997, '2020': 4.909453272601327})
defaultdict(<class 'float'>, {'2017': -10.470101199988582, '2018': 1.1047281706301058, '2019': 2.5821241330852116, '2020': -29.30797985165595})
defaultdict(<class 'float'>, {'2017': 20.512334305582954, '2018': 7.319341349672974, '2019': 9.377759054903562, '2020': 12.235978849894078})
defa

defaultdict(<class 'float'>, {'2017': 32.87377804750472, '2018': 10.577701237721739, '2019': -7.039660198074439, '2020': -20.876743237157974})
defaultdict(<class 'float'>, {'2017': -27.61923573235574, '2018': -8.311729949102984, '2019': 6.185889843760804, '2020': -6.162770947488254})
defaultdict(<class 'float'>, {'2017': 33.313937279469194, '2018': -5.059812737413559, '2019': -2.691091933251059, '2020': -2.4522213939116995})
defaultdict(<class 'float'>, {'2017': 6.064592470128048, '2018': -13.150203588968237, '2019': 17.51783546898756, '2020': -5.730957765522329})
defaultdict(<class 'float'>, {'2017': 30.36526414112822, '2018': 5.6519132541335955, '2019': 2.7843867044048154, '2020': 134.84958291729208})
defaultdict(<class 'float'>, {'2017': -59.788196188131, '2018': 0.936921836762227, '2019': 9.689025870587544, '2020': -26.058644718430806})
defaultdict(<class 'float'>, {'2017': 39.670506941344414, '2018': 6.734967250072938, '2019': -16.293054805310277, '2020': -6.683138171853159})
defa

In [12]:
print(sorted(temp_dict.items(), key=operator.itemgetter(1)))

[('CEI.csv', -377.9876916255227), ('INPX.csv', -360.9979798320176), ('CHFS.csv', -358.31760685460915), ('TRNX.csv', -351.86185780523044), ('SLS.csv', -351.56909634935397), ('AVGR.csv', -345.58673107644097), ('TOPS.csv', -340.82663244279456), ('JAGX.csv', -329.09601189027865), ('SDRL.csv', -327.99849005427984), ('PACD.csv', -322.9139403699113), ('EARS.csv', -312.55054936697667), ('NVIV.csv', -312.1928359027936), ('TNXP.csv', -311.76916245540815), ('PSTV.csv', -311.1101344875144), ('AYTU.csv', -302.40930756724845), ('VISL.csv', -301.3633235685811), ('DFFN.csv', -300.57855650228), ('OPGN.csv', -299.4078307714714), ('ANY.csv', -294.9210065004411), ('PHIO.csv', -292.0365053693805), ('TROV.csv', -283.98409328395053), ('FTR.csv', -280.68304740510825), ('ACHV.csv', -280.6493414849918), ('SAEX.csv', -279.85532458178176), ('AKER.csv', -277.26215952205735), ('ONTX.csv', -276.9106083827341), ('SEEL.csv', -276.32674958234486), ('BIOC.csv', -275.94466360972365), ('RWLK.csv', -273.67870455004567), ('

In [13]:
sorted_stock_names = sorted(temp_dict.items(), key=operator.itemgetter(1))

In [15]:
type(sorted_stock_names[0])

tuple

In [21]:
# Write to CSV
import csv 
with open(stock_info_directory + "/ranked_stock_prices.csv", 'w') as out:
    csv_out = csv.writer(out)
    for row in sorted_stock_names:
        print(row)
        csv_out.writerow(row)

('CEI.csv', -377.9876916255227)
('INPX.csv', -360.9979798320176)
('CHFS.csv', -358.31760685460915)
('TRNX.csv', -351.86185780523044)
('SLS.csv', -351.56909634935397)
('AVGR.csv', -345.58673107644097)
('TOPS.csv', -340.82663244279456)
('JAGX.csv', -329.09601189027865)
('SDRL.csv', -327.99849005427984)
('PACD.csv', -322.9139403699113)
('EARS.csv', -312.55054936697667)
('NVIV.csv', -312.1928359027936)
('TNXP.csv', -311.76916245540815)
('PSTV.csv', -311.1101344875144)
('AYTU.csv', -302.40930756724845)
('VISL.csv', -301.3633235685811)
('DFFN.csv', -300.57855650228)
('OPGN.csv', -299.4078307714714)
('ANY.csv', -294.9210065004411)
('PHIO.csv', -292.0365053693805)
('TROV.csv', -283.98409328395053)
('FTR.csv', -280.68304740510825)
('ACHV.csv', -280.6493414849918)
('SAEX.csv', -279.85532458178176)
('AKER.csv', -277.26215952205735)
('ONTX.csv', -276.9106083827341)
('SEEL.csv', -276.32674958234486)
('BIOC.csv', -275.94466360972365)
('RWLK.csv', -273.67870455004567)
('TTNP.csv', -270.96426582684774

('UBA.csv', -5.541857895276746)
('GHY.csv', -5.512123970524432)
('OILX.csv', -5.416531817399521)
('TWO.csv', -5.408041223995967)
('LMRKO.csv', -5.401704396005529)
('JEQ.csv', -5.374264006511832)
('ETV.csv', -5.3725652224698015)
('VER.csv', -5.351137828447974)
('BSL.csv', -5.344224802635744)
('BGCP.csv', -5.325532831990346)
('NUE.csv', -5.3220521137136)
('BSE.csv', -5.3192482113835755)
('LCNB.csv', -5.296763473642326)
('TDJ.csv', -5.2810366721298445)
('GBDC.csv', -5.258883482594403)
('LYB.csv', -5.232014354980077)
('EFT.csv', -5.225145173026159)
('EXG.csv', -5.207348263862505)
('UZB.csv', -5.198001475316566)
('SOJA.csv', -5.188661564641487)
('RIF.csv', -5.186476881756147)
('IPLDP.csv', -5.179656936553772)
('EVG.csv', -5.154142441362856)
('KNOP.csv', -5.143367394023507)
('EFR.csv', -5.138942999288877)
('ETJ.csv', -5.131711376654726)
('HPF.csv', -5.006832057088237)
('XVZ.csv', -5.0068040385655195)
('VVR.csv', -4.993938202073521)
('CTV.csv', -4.897645425357744)
('NGHC.csv', -4.839990978465

('GRFS.csv', 39.959465522203075)
('CTSO.csv', 39.991891184577774)
('CINF.csv', 40.010903972194605)
('MGPI.csv', 40.03071702461292)
('FIHD.csv', 40.11525464243079)
('XNET.csv', 40.12338101929278)
('EME.csv', 40.13214453211789)
('DLB.csv', 40.1874982629777)
('INTL.csv', 40.196540076918126)
('EXPD.csv', 40.20305460967763)
('WMGI.csv', 40.26308663399638)
('AZO.csv', 40.30325839648717)
('SPPI.csv', 40.3595007132993)
('GRBK.csv', 40.55429551837411)
('BYD.csv', 40.5702847135999)
('AIR.csv', 40.57653475613606)
('UTL.csv', 40.58015029556171)
('CSPI.csv', 40.60288236216869)
('SFST.csv', 40.62739396129194)
('OTEX.csv', 40.63736987681296)
('CFG.csv', 40.75730138601007)
('TCFC.csv', 40.78855662328413)
('GBCI.csv', 40.79905860504341)
('WERN.csv', 40.85029782064914)
('MC.csv', 40.87489863427086)
('AMSWA.csv', 40.88462743201228)
('MCFT.csv', 40.96783325591881)
('EVBN.csv', 40.97155906107592)
('TKR.csv', 40.9735600681297)
('NATI.csv', 41.04694650310354)
('LOW.csv', 41.10967543351753)
('BSGM.csv', 41.15