- https://github.com/dataquestio/solutions/blob/master/Mission177Solutions.ipynb


<b> 1 - Load data </b>

- Proposed data structure to hold data:
- Hash, List, List:
{'stock':[[day, Value, Value2, Value3],
          [day, Value, Value2, Value3]
          ]
}

This way it's possible to quickly identify the stock using hash table key, then the stock list will have up to 255 items. Each item will have Date and the values for the day. The data is already sorted, so we can use binary search to find a specific day


In [2]:
import os
import concurrent.futures
import csv


def read_file(file):
   
    # Create key using file name
    key = file[:-4]
    f = open('prices/' + file,'r')
    data = list(csv.reader(f))
  
    # Return key and data, so it can build a dictionary
    return key, data


#load_data('dgica.csv')

pool = concurrent.futures.ProcessPoolExecutor(max_workers=5)
data = pool.map(read_file, os.listdir('prices'))
data = list(data)

# Make it a hash table 
data = dict(data) 


print('done')    
    

done


<b> 2 - Computing aggregations <b>

In [3]:
from dateutil.parser import parse

# Transform data into
# Hash table, hash table, list
stocks_data = {}
for stock,v in data.items():
   
    headers = v[0] # ['date', 'close', 'open', 'high', 'low', 'volume']
    stock_data = v[1:]
    
    # Loop through each header to build a list with its values
    header_data = {}
    for i, header in enumerate(headers):
        
        # Use header index to pick right item from the list
        #['2007-01-03', '55.380001', '54.880001', '55.75', '54.880001', '66000']
        # Parse data in the process as well
        if i > 0:
            values = [float(p[i]) for p in stock_data]
        else:
            # Parse date using dateutil.parser
            values = [parse(p[i]) for p in stock_data]
        header_data[header] = values
        
    # Build final hash table
    stocks_data[stock] = header_data
    
    #{'stock': 'date': [1,2,3,4],
    #           'volume': [1,2,3,4]}
#print(stocks_data['bokf'])

print('done')


done


In [4]:
from statistics import mean


# ['date', 'close', 'open', 'high', 'low', 'volume']

# Average closing price Apple (aapl)
print('Apple average closing price =', mean(stocks_data['aapl']['close']))
print()

# Average volume Apple (aapl)
print('Apple average volume =', mean(stocks_data['aapl']['volume']))
print()

# Highest price variation for apple
max_var = 0
max_var_index = -1
for i,h in enumerate(stocks_data['aapl']['high']):
    low = stocks_data['aapl']['low'][i]
    
    if (h - low) > max_var:
        max_var = h - low
        max_var_index = i
print('Highest price variation for apple =', max_var) 
print('date:', stocks_data['aapl']['date'][max_var_index])
print('high =', stocks_data['aapl']['high'][max_var_index])
print('low =', stocks_data['aapl']['low'][max_var_index])
print('volume =', stocks_data['aapl']['volume'][max_var_index])

print()










Apple average closing price = 257.17654040231656

Apple average volume = 130112422.35521236

Highest price variation for apple = 59.00000200000002
date: 2010-05-06 00:00:00
high = 258.249996
low = 199.249994
volume = 321465200.0



In [4]:
# Average closing for all stocks

print('Average closing for all stocks:')
mean_prices = [(stock,mean(stocks_data[stock]['close'])) for stock in stocks_data]
print(sorted(mean_prices, key=lambda x:x[1]))


Average closing for all stocks:
[('blfs', 0.8122763011583011), ('apdn', 0.8241009938223938), ('bmra', 0.901011583011583), ('bcli', 0.9969415324324323), ('cyrx', 1.1615408884169884), ('clrb', 1.2045711436293436), ('cpst', 1.206953667953668), ('csbr', 1.2282443845854418), ('egt', 1.3293513513513513), ('aemd', 1.398042471042471), ('dfbg', 1.4005010393822395), ('alqa', 1.405298283011583), ('cpah', 1.4116189448441248), ('astc', 1.4152123552123552), ('chci', 1.4581224154440156), ('ctic', 1.494366311969112), ('eltk', 1.5323436293436294), ('dzsi', 1.5382316602316601), ('cool', 1.5475988922779924), ('cgnt', 1.5946138996138997), ('creg', 1.6028996138996139), ('casi', 1.617906349034749), ('admp', 1.7122164397683397), ('bnso', 1.7172548262548262), ('aezs', 1.7391445949806952), ('dynt', 1.822119691119691), ('apps', 1.8256061776061776), ('dysl', 1.8631660231660232), ('apri', 1.8681738996138995), ('crds', 1.8903166015444017), ('dlhc', 1.8903745173745172), ('cur', 1.907691699604743), ('ardm', 1.928069

<b> 3. Finding The Most Traded Stock Each Day <b>

In [5]:
import datetime

#{'stock': 'date': [1,2,3,4],
#          'volume': [1,2,3,4]}
stocks_date = {}
stocks_volume = {}
for stock in stocks_data.keys():
    for i,d in enumerate(stocks_data[stock]['date']):
        date = d.strftime('%Y-%m-%d')
        if  date not in stocks_date.keys():
            stocks_date[date] = (stocks_data[stock]['volume'][i], stock)
            stocks_volume[date] = stock
        else:
            if stocks_data[stock]['volume'][i] > stocks_date[date][0]:
                stocks_date[date] = (stocks_data[stock]['volume'][i], stock)
                stocks_volume[date] = stock
   
      
most_traded = []
for d in sorted(stocks_volume.keys()):
    most_traded.append([d,stocks_volume[d]])
    
print(most_traded)
        
    

[['2007-01-03', 'aapl'], ['2007-01-04', 'aapl'], ['2007-01-05', 'aapl'], ['2007-01-08', 'aapl'], ['2007-01-09', 'aapl'], ['2007-01-10', 'aapl'], ['2007-01-11', 'aapl'], ['2007-01-12', 'aapl'], ['2007-01-16', 'aapl'], ['2007-01-17', 'aapl'], ['2007-01-18', 'aapl'], ['2007-01-19', 'aapl'], ['2007-01-22', 'aapl'], ['2007-01-23', 'aapl'], ['2007-01-24', 'aapl'], ['2007-01-25', 'aapl'], ['2007-01-26', 'aapl'], ['2007-01-29', 'aapl'], ['2007-01-30', 'aapl'], ['2007-01-31', 'aapl'], ['2007-02-01', 'aapl'], ['2007-02-02', 'aapl'], ['2007-02-05', 'aapl'], ['2007-02-06', 'aapl'], ['2007-02-07', 'aapl'], ['2007-02-08', 'aapl'], ['2007-02-09', 'aapl'], ['2007-02-12', 'aapl'], ['2007-02-13', 'aapl'], ['2007-02-14', 'aapl'], ['2007-02-15', 'bidu'], ['2007-02-16', 'aapl'], ['2007-02-20', 'aapl'], ['2007-02-21', 'aapl'], ['2007-02-22', 'aapl'], ['2007-02-23', 'aapl'], ['2007-02-26', 'aapl'], ['2007-02-27', 'aapl'], ['2007-02-28', 'aapl'], ['2007-03-01', 'aapl'], ['2007-03-02', 'aapl'], ['2007-03-05', 

In [6]:
high_volume = {}

for stock, prices in stocks_data.items():    
    for i, d in enumerate(prices['date']):        
        if d not in high_volume.keys():
            high_volume[d] = 0
        high_volume[d] += prices['volume'][i]
               
# Sorte by value and keep top 10 only
top_10 = sorted(high_volume.items(), key=lambda x: x[1], reverse=True)[:10]
print("TOP 10 day trades by volume:")
print(top_10)

        


TOP 10 day trades by volume:
[(datetime.datetime(2008, 1, 23, 0, 0), 1964583900.0), (datetime.datetime(2008, 10, 10, 0, 0), 1770266900.0), (datetime.datetime(2007, 7, 26, 0, 0), 1611272800.0), (datetime.datetime(2008, 10, 8, 0, 0), 1599183500.0), (datetime.datetime(2008, 1, 22, 0, 0), 1578877700.0), (datetime.datetime(2008, 2, 7, 0, 0), 1559032100.0), (datetime.datetime(2008, 9, 29, 0, 0), 1555072400.0), (datetime.datetime(2007, 11, 8, 0, 0), 1553880500.0), (datetime.datetime(2008, 1, 16, 0, 0), 1536176400.0), (datetime.datetime(2008, 1, 24, 0, 0), 1533363200.0)]


In [7]:
for d,v in top_10:
    print('Date:',d)
    print('Total Volume:',v)
    
    
    for stock, price in stocks_data.items():
        i = price['date'].index(d)
        print('Stock =',stock,' Close Price =',price['close'][i] )
        #print(i)
        #print(price['close'][i])
        
    print('------------------------------------')

Date: 2008-01-23 00:00:00
Total Volume: 1964583900.0
Stock = abcb  Close Price = 15.320042
Stock = evlv  Close Price = 5.78
Stock = bybk  Close Price = 12.5
Stock = cy  Close Price = 22.200013
Stock = arlp  Close Price = 34.450001
Stock = fhco  Close Price = 2.5
Stock = cffi  Close Price = 30.799999
Stock = cme  Close Price = 609.990005
Stock = cnbka  Close Price = 20.200001
Stock = apwc  Close Price = 4.6
Stock = bvsn  Close Price = 1.55
Stock = cent  Close Price = 4.41
Stock = bwinb  Close Price = 27.15
Stock = buse  Close Price = 20.979978
Stock = colm  Close Price = 41.990002
Stock = amd  Close Price = 7.56
Stock = cenx  Close Price = 42.490002
Stock = cbsh  Close Price = 43.240306
Stock = clsn  Close Price = 4.710001
Stock = becn  Close Price = 8.63
Stock = cfnb  Close Price = 10.12
Stock = acls  Close Price = 4.0
Stock = amed  Close Price = 45.830002
Stock = ardm  Close Price = 1.29
Stock = asrvp  Close Price = 24.559999
Stock = abax  Close Price = 35.279999
Stock = csbk  Close P

ValueError: datetime.datetime(2008, 10, 10, 0, 0) is not in list

<b> Finding the most profitable stock <b>

In [43]:

stock_performance = []
for s,prices in stocks_data.items():
    price_difference = prices['close'][-1] - prices['close'][0]
    pct_difference = price_difference / prices['close'][0]
    stock_performance.append((s,pct_difference))


most_profitable = max(stock_performance,key=lambda item:item[1])
print('The most profitable stock is:', most_profitable[0])
print('Percentage Growth =',most_profitable[1])




The most profitable stock is: admp
Percentage Growth = 74.83838922594839


In [44]:
print('done')

done
