In [1]:
import pandas as pd 
from collections import defaultdict 
import numpy as np 
import matplotlib.pyplot as plt
from os import walk 
from pprint import pprint 
import operator 

%matplotlib inline 


In [2]:
# from supporting_objects.StockSummary import StockSummary
from collections import defaultdict

class StockSummary:
    def __init__(self):
        self.stock_symbol = None
        self.stock_name = None
        self.stock_mean_value_per_date = defaultdict(float)
        self.stock_mean_volume_per_date = defaultdict(float)
        self.stock_mean_value_per_year = defaultdict(float)
        self.stock_price_over_year = None 
        self.year_over_year_profit = None 
        self.summation_of_profits = None
        self.overall_year_profit = None


In [3]:
all_stock_data = "/Users/pyuvraj/CCPP/data_for_profit_from_stock/all_stocks_historical_prices/symbols_valid_meta.csv"
stock_info_directory = "/Users/pyuvraj/CCPP/data_for_profit_from_stock/all_stocks_historical_prices/stocks"

In [4]:
class FindHighGrowthStocksNASDAQ:
    def __init__(self, stock_csv_folder, stock_description_file):
        self.stock_csv_folder = stock_csv_folder
        self.stock_description_file = stock_description_file
        self.stock_csv_absoulte_path = {}
        self.data = self.ReadAllStockData()
        self.delta_days = 60
        self.data_per_company = defaultdict(StockSummary)

    def ReadAllStockData(self):
        f = []
        stock_file_in_location = defaultdict(str)
        for (dirpath, dirnames, filenames) in walk(stock_info_directory):
            f.extend(filenames)
        for file_name in f:
            stock_file_in_location[file_name] = stock_info_directory + "/" + file_name
        return stock_file_in_location

    def ReadAndIteratePerStock(self):
        i = 0
        for stock_code, absolute_location in self.data.items():
            i += 1
#             if i > 2:
#                 break
            stock_csv_data = pd.read_csv(absolute_location)
            stock_csv_data_processed = self.AddDateMonthYearColumns(stock_csv_data)
#             print("stock code", stock_code[:-4], stock_code)
            self.data_per_company[stock_code] = self.ProcessOverallDataPerStock(
                stock_code[:-4], stock_csv_data_processed
            )

    #             print(stock_code, self.data_per_company['stock_code'].summation_of_profits)
    #             if i > 200: break

    def AddDateMonthYearColumns(self, data):
        Year, Month, Day = [], [], []
        for index in data["Date"]:
            year = int(index[0:4])
            month = int(index[5:7])
            day = int(index[8:10])
            Year.append(year)
            Month.append(month)
            Day.append(day)
        data["Year"] = Year
        data["Month"] = Month
        data["Day"] = Day
        return data

    def GetStatisticsFor30Days(self, precise_data):
        avg_price = np.mean(precise_data["Close"])
        avg_volume = np.mean(precise_data["Volume"])
        date = np.min(precise_data["Date"])
        return date, avg_price, avg_volume

    def StockPriceMeanPerYear(self, data):
        year_over_year_profit = defaultdict(float)
        stock_price_over_year = defaultdict(float)

        stock_price_over_year["2016"] = np.mean(data[data["Year"] == 2016]["Close"])
        stock_price_over_year["2017"] = np.mean(data[data["Year"] == 2017]["Close"])
        stock_price_over_year["2018"] = np.mean(data[data["Year"] == 2018]["Close"])
        stock_price_over_year["2019"] = np.mean(data[data["Year"] == 2019]["Close"])
        stock_price_over_year["2020"] = np.mean(data[data["Year"] == 2020]["Close"])

        year_over_year_profit["2017"] = 100.0 * (
            (stock_price_over_year["2017"] - stock_price_over_year["2016"])
            / stock_price_over_year["2016"]
        )
        year_over_year_profit["2018"] = 100.0 * (
            (stock_price_over_year["2018"] - stock_price_over_year["2017"])
            / stock_price_over_year["2017"]
        )
        year_over_year_profit["2019"] = 100.0 * (
            (stock_price_over_year["2019"] - stock_price_over_year["2018"])
            / stock_price_over_year["2018"]
        )
        year_over_year_profit["2020"] = 100.0 * (
            (stock_price_over_year["2020"] - stock_price_over_year["2019"])
            / stock_price_over_year["2019"]
        )

        overall_year_profit = 100.0 * (
            (stock_price_over_year["2020"] - stock_price_over_year["2016"])
            / stock_price_over_year["2016"]
        )
        return (
            stock_price_over_year,
            year_over_year_profit,
            sum(year_over_year_profit.values()),
            overall_year_profit,
        )

    def ProcessOverallDataPerStock(self, stock_code, data):
        close = len(data)
        start = close - self.delta_days
        stock_summary = StockSummary()
        stock_summary.stock_symbol = stock_code
#         print("stock code", stock_code)
        (
            stock_summary.stock_price_over_year,
            stock_summary.year_over_year_profit,
            stock_summary.summation_of_profits,
            stock_summary.overall_year_profit,
        ) = self.StockPriceMeanPerYear(data)
#         print(stock_summary.stock_price_over_year)
#         print(stock_summary.year_over_year_profit)
#         print(stock_summary.summation_of_profits)
        while start > 0:
            date, mean_value, mean_volume = self.GetStatisticsFor30Days(
                data[start:close]
            )
#             print(date, mean_value, mean_volume)
            stock_summary.stock_mean_value_per_date[date] = mean_value
            stock_summary.stock_mean_volume_per_date[date] = mean_volume
            if data["Date"][start] < "2016-01-01":
                break
            close = start
            start = start - self.delta_days
        return stock_summary

    def RetrieveData(self):
        return self.data_per_company


In [5]:
find_high_growth_stocks_nasdaq = FindHighGrowthStocksNASDAQ(stock_info_directory, all_stock_data)
find_high_growth_stocks_nasdaq.ReadAndIteratePerStock()
user_data = find_high_growth_stocks_nasdaq.RetrieveData()

In [6]:
print(dir(user_data['RIV.csv']))
print(user_data['RIV.csv'].year_over_year_profit)
print(user_data['RIV.csv'].stock_symbol)

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'overall_year_profit', 'stock_mean_value_per_date', 'stock_mean_value_per_year', 'stock_mean_volume_per_date', 'stock_name', 'stock_price_over_year', 'stock_symbol', 'summation_of_profits', 'year_over_year_profit']
defaultdict(<class 'float'>, {'2017': 6.550871191375593, '2018': -1.6515324278688372, '2019': -13.592615562410026, '2020': -7.493900452982453})
RIV


In [7]:
print(type(user_data))

<class 'collections.defaultdict'>


In [13]:
temp_dict = defaultdict(float)
for key, value in user_data.items():
    if (
        value.year_over_year_profit["2020"] > 10
        and value.year_over_year_profit["2019"] > 10
        and value.year_over_year_profit["2018"] > 10
        and value.year_over_year_profit["2017"] > 10
        and value.overall_year_profit > 10
    ):
        temp_dict[key] = value
        print(value.year_over_year_profit, value.overall_year_profit)


defaultdict(<class 'float'>, {'2017': 22.643251796557358, '2018': 51.96429487563285, '2019': 32.93491862803894, '2020': 18.16159198362488}) 192.75250769236413
defaultdict(<class 'float'>, {'2017': 43.90992328496258, '2018': 31.914779329326347, '2019': 10.356211516367072, '2020': 24.13479023560828}) 160.06056070848314
defaultdict(<class 'float'>, {'2017': 65.14505863916118, '2018': 14.864377173409643, '2019': 25.93467108991493, '2020': 26.654733151533193}) 202.56429888958638
defaultdict(<class 'float'>, {'2017': 48.241494597850256, '2018': 67.38883709757238, '2019': 43.81738282045705, '2020': 46.60699171925267}) 423.1935011853649
defaultdict(<class 'float'>, {'2017': 31.988789934193218, '2018': 49.38930997661682, '2019': 64.14772931166222, '2020': 10.106574840877137}) 256.37292445224807
defaultdict(<class 'float'>, {'2017': 25.43603784824232, '2018': 35.72556727900614, '2019': 25.772824993277993, '2020': 12.593704054863112}) 141.0931744593695
defaultdict(<class 'float'>, {'2017': 68.090

In [12]:
print(sorted(temp_dict.items(), key=operator.itemgetter(1)))

[('CEI.csv', -377.9876916255227), ('INPX.csv', -360.9979798320176), ('CHFS.csv', -358.31760685460915), ('TRNX.csv', -351.86185780523044), ('SLS.csv', -351.56909634935397), ('AVGR.csv', -345.58673107644097), ('TOPS.csv', -340.82663244279456), ('JAGX.csv', -329.09601189027865), ('SDRL.csv', -327.99849005427984), ('PACD.csv', -322.9139403699113), ('EARS.csv', -312.55054936697667), ('NVIV.csv', -312.1928359027936), ('TNXP.csv', -311.76916245540815), ('PSTV.csv', -311.1101344875144), ('AYTU.csv', -302.40930756724845), ('VISL.csv', -301.3633235685811), ('DFFN.csv', -300.57855650228), ('OPGN.csv', -299.4078307714714), ('ANY.csv', -294.9210065004411), ('PHIO.csv', -292.0365053693805), ('TROV.csv', -283.98409328395053), ('FTR.csv', -280.68304740510825), ('ACHV.csv', -280.6493414849918), ('SAEX.csv', -279.85532458178176), ('AKER.csv', -277.26215952205735), ('ONTX.csv', -276.9106083827341), ('SEEL.csv', -276.32674958234486), ('BIOC.csv', -275.94466360972365), ('RWLK.csv', -273.67870455004567), ('

In [13]:
sorted_stock_names = sorted(temp_dict.items(), key=operator.itemgetter(1))

In [14]:
import csv 
output = "/Users/pyuvraj/CCPP/data_for_profit_from_stock/all_stocks_historical_prices/output"
file = open(output + "/historical_profit_all_stocks_v3.csv", 'w', newline='')
with file:
    header = ['stock_symbol', 'overall_profit', '2017_value', '2018_value', '2019_value', '2020_value', '2017_profit', '2018_profit', '2019_profit', '2020_profit']
    writer = csv.DictWriter(file, fieldnames = header)
    writer.writeheader()
    for key, value in temp_dict.items():
        writer.writerow({'stock_symbol': value.stock_symbol, 
                        'overall_profit': value.overall_year_profit,
                        '2017_value': value.stock_price_over_year['2017'],
                        '2018_value': value.stock_price_over_year['2018'],
                        '2019_value': value.stock_price_over_year['2019'],
                        '2020_value': value.stock_price_over_year['2020'], 
                        '2017_profit': value.year_over_year_profit['2017'],
                        '2018_profit': value.year_over_year_profit['2018'], 
                        '2019_profit': value.year_over_year_profit['2019'], 
                        '2020_profit': value.year_over_year_profit['2020'], 
                        })

In [21]:
# Write to CSV
import csv 
with open(stock_info_directory + "/ranked_stock_prices.csv", 'w') as out:
    csv_out = csv.writer(out)
    for row in sorted_stock_names:
        print(row)
        csv_out.writerow(row)

('CEI.csv', -377.9876916255227)
('INPX.csv', -360.9979798320176)
('CHFS.csv', -358.31760685460915)
('TRNX.csv', -351.86185780523044)
('SLS.csv', -351.56909634935397)
('AVGR.csv', -345.58673107644097)
('TOPS.csv', -340.82663244279456)
('JAGX.csv', -329.09601189027865)
('SDRL.csv', -327.99849005427984)
('PACD.csv', -322.9139403699113)
('EARS.csv', -312.55054936697667)
('NVIV.csv', -312.1928359027936)
('TNXP.csv', -311.76916245540815)
('PSTV.csv', -311.1101344875144)
('AYTU.csv', -302.40930756724845)
('VISL.csv', -301.3633235685811)
('DFFN.csv', -300.57855650228)
('OPGN.csv', -299.4078307714714)
('ANY.csv', -294.9210065004411)
('PHIO.csv', -292.0365053693805)
('TROV.csv', -283.98409328395053)
('FTR.csv', -280.68304740510825)
('ACHV.csv', -280.6493414849918)
('SAEX.csv', -279.85532458178176)
('AKER.csv', -277.26215952205735)
('ONTX.csv', -276.9106083827341)
('SEEL.csv', -276.32674958234486)
('BIOC.csv', -275.94466360972365)
('RWLK.csv', -273.67870455004567)
('TTNP.csv', -270.96426582684774

('UBA.csv', -5.541857895276746)
('GHY.csv', -5.512123970524432)
('OILX.csv', -5.416531817399521)
('TWO.csv', -5.408041223995967)
('LMRKO.csv', -5.401704396005529)
('JEQ.csv', -5.374264006511832)
('ETV.csv', -5.3725652224698015)
('VER.csv', -5.351137828447974)
('BSL.csv', -5.344224802635744)
('BGCP.csv', -5.325532831990346)
('NUE.csv', -5.3220521137136)
('BSE.csv', -5.3192482113835755)
('LCNB.csv', -5.296763473642326)
('TDJ.csv', -5.2810366721298445)
('GBDC.csv', -5.258883482594403)
('LYB.csv', -5.232014354980077)
('EFT.csv', -5.225145173026159)
('EXG.csv', -5.207348263862505)
('UZB.csv', -5.198001475316566)
('SOJA.csv', -5.188661564641487)
('RIF.csv', -5.186476881756147)
('IPLDP.csv', -5.179656936553772)
('EVG.csv', -5.154142441362856)
('KNOP.csv', -5.143367394023507)
('EFR.csv', -5.138942999288877)
('ETJ.csv', -5.131711376654726)
('HPF.csv', -5.006832057088237)
('XVZ.csv', -5.0068040385655195)
('VVR.csv', -4.993938202073521)
('CTV.csv', -4.897645425357744)
('NGHC.csv', -4.839990978465

('GRFS.csv', 39.959465522203075)
('CTSO.csv', 39.991891184577774)
('CINF.csv', 40.010903972194605)
('MGPI.csv', 40.03071702461292)
('FIHD.csv', 40.11525464243079)
('XNET.csv', 40.12338101929278)
('EME.csv', 40.13214453211789)
('DLB.csv', 40.1874982629777)
('INTL.csv', 40.196540076918126)
('EXPD.csv', 40.20305460967763)
('WMGI.csv', 40.26308663399638)
('AZO.csv', 40.30325839648717)
('SPPI.csv', 40.3595007132993)
('GRBK.csv', 40.55429551837411)
('BYD.csv', 40.5702847135999)
('AIR.csv', 40.57653475613606)
('UTL.csv', 40.58015029556171)
('CSPI.csv', 40.60288236216869)
('SFST.csv', 40.62739396129194)
('OTEX.csv', 40.63736987681296)
('CFG.csv', 40.75730138601007)
('TCFC.csv', 40.78855662328413)
('GBCI.csv', 40.79905860504341)
('WERN.csv', 40.85029782064914)
('MC.csv', 40.87489863427086)
('AMSWA.csv', 40.88462743201228)
('MCFT.csv', 40.96783325591881)
('EVBN.csv', 40.97155906107592)
('TKR.csv', 40.9735600681297)
('NATI.csv', 41.04694650310354)
('LOW.csv', 41.10967543351753)
('BSGM.csv', 41.15