In [1]:
"""
File I/O
"""
import numpy as np

# The identity matrix can be created with the eye() function. The only argument need to give the eye() function is the number of ones.
i2 = np.eye(2)
print("i2: ", i2)

# save the data in a plain text
np.savetxt("data/eye.txt", i2)

i2:  [[1. 0.]
 [0. 1.]]


In [2]:
"""
CSV File

1. The loadtxt() function can conveniently read CSV files, split up the fields, and load the data into NumPy arrays. 

2. The unpack argument is set to True, which means that data will be unpacked and assigned to the c and v variables that
   will hold the close price and volume respectively.

3. The mean() function calculates the arithmetic mean of an array.

4. NumPy allows us to compute the spread of an array with a function called ptp(). The ptp() function returns the difference between
   the maximum and minimum values of an array. 
"""

c, v = np.loadtxt('data/data.csv', delimiter=',', usecols=(6, 7), unpack=True)
h, l = np.loadtxt('data/data.csv', delimiter=',', usecols=(4, 5), unpack=True)

vwap = np.average(c, weights=v)
print("VWAP =", vwap)

t = np.arange(len(c))
twap = np.average(c, weights=t)
print("TWAP =", twap)

m = np.mean(c)
print("MEAN =", m)

highest = np.max(h)
lowest  = np.min(l)
print("MAX = ", highest)
print("MIN = ", lowest)

h_spread = np.ptp(h)
l_spread = np.ptp(l)
print("Spread high price", h_spread)
print("Spread low price", l_spread)

VWAP = 350.5895493532009
TWAP = 352.4283218390804
MEAN = 351.0376666666667
MAX =  364.9
MIN =  333.53
Spread high price 24.859999999999957
Spread low price 26.970000000000027


In [4]:
"""
Simple statistics 
"""
# only get close price
c = np.loadtxt('data/data.csv', delimiter=',', usecols=(6,), unpack=True)

# median value
median = np.median(c)
print('median = ', median)

# sortting the array
sorted_close = np.msort(c)
print('sorted = ', sorted_close)

# for even-length arrays, the median() is calculated from the average of the two array values in the middle.
n1 = round((len(c) - 1) / 2)
n2 = n1 + 1
print("median = ", (sorted_close[n1] + sorted_close[n2])/2)

# variance value
var = np.var(c)
print("varance = ", var)
print("varance = ", c.var())
var2 = np.mean((c- c.mean()) ** 2)
print("varance by def = ", var2)

median =  352.055
sorted =  [336.1  338.61 339.32 342.62 342.88 343.44 344.32 345.03 346.5  346.67
 348.16 349.31 350.56 351.88 351.99 352.12 352.47 353.21 354.54 355.2
 355.36 355.76 356.85 358.16 358.3  359.18 359.56 359.9  360.   363.13]
median =  352.055
varance =  50.126517888888884
varance =  50.126517888888884
varance by def =  50.126517888888884


In [None]:
"""
NumPy has the diff() function that returns an array that is built up of the difference between two consecutive array elements. To get
the returns, we also have to divide by the value of the previous day. We must be careful though. The array returned by diff() is one
element shorter than the close prices array
"""

# returns
returns = np.diff(c) / c[:-1]
print(returns)
print("Standard deviation =", np.std(returns))

# log returns
logreturns = np.diff(np.log(c))
print(logreturns)

# positive
posretindices = np.where(returns > 0)
print("Indices with positive returns", posretindices)

# volatility
annual_volatility = np.std(logreturns) / np.mean(logreturns)
annual_volatility = annual_volatility / np.sqrt(1./252.)
print("Annual volatility", annual_volatility)
print("Monthly volatility", annual_volatility * np.sqrt(1./12.))

In [11]:
"""
Dates

1. The loadtxt() function has a special parameter called converters and is a dictionary that links columns with the so-called converter
   functions. It is our responsibility to write the converter function.

2. We give the datestr2num() function dates as a string, such as 28-01-2011. The string is first turned into a datetime object, using
   a specified format %d-%m-%Y. Second, the datetime object is turned into a day. Finally, the weekday method is called on the date to
   return a number. As you can read in the comments, the number is between 0 and 6. 0 is, for instance, Monday, and 6 is Sunday.

3. We already learned about the where() function that returns indices of the array for elements that conform to a specified condition.
   The take() function can use these indices and takes the values of the corresponding array items. 
"""
import datetime

def datestr2num(s):
    return datetime.datetime.strptime(s.decode(), "%d-%m-%Y").date().weekday()

# load data
dates, close = np.loadtxt('data/data.csv', delimiter=',', usecols=(1, 6), converters={1: datestr2num}, unpack=True)
print('dates: ', dates)

# init array
average = np.zeros(5)

# compute the average
for i in range(5):
    indices = np.where(dates == i)
    price = np.take(close, indices)
    avg = np.mean(price)
    print('Day', i, "Price", price, "Average", avg)
    average[i] = avg

# highest & lowest
top = np.max(average)
print("Highest average", top)
print("Top day of the week", np.argmax(average))
bottom = np.min(average)
print("Lowest average", bottom)
print("Bottom day of the week", np.argmin(average))

dates:  [4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 1. 2. 3. 4. 0. 1. 2. 3.
 4. 0. 1. 2. 3. 4.]
Day 0 Price [[339.32 351.88 359.18 353.21 355.36]] Average 351.7900000000001
Day 1 Price [[345.03 355.2  359.9  338.61 349.31 355.76]] Average 350.63500000000005
Day 2 Price [[344.32 358.16 363.13 342.62 352.12 352.47]] Average 352.1366666666666
Day 3 Price [[343.44 354.54 358.3  342.88 359.56 346.67]] Average 350.8983333333333
Day 4 Price [[336.1  346.5  356.85 350.56 348.16 360.   351.99]] Average 350.0228571428571
Highest average 352.1366666666666
Top day of the week 2
Lowest average 350.0228571428571
Bottom day of the week 4


In [16]:
"""
NumPy's datetime64 type

1. NumPy uses the ISO 8601 standard. This is an international standard to represent dates and times. ISO 8601 allows the YYYY-MM-DD,
   YYYY-MM, and YYYYMMDD formats.

2. By default, ISO 8601 uses the local time zone. Times can be specified using the format T[hh:mm:ss]. 

3. The subtraction creates a NumPy timedelta64 object; We can also add or subtract a number of days to a datetime64 object.
"""

# create a datetime64 from a string for April 22, 2015 
print(np.datetime64('2015-04-22'))

# April 2015
print(np.datetime64('2015-04'))

# local time
local = np.datetime64('1677-01-01T20:19')
print(local)

# offset
with_offset = np.datetime64('1677-01-01T20:19-0900')
print(with_offset)

# subtract the two datetime64 objects from each other
print(local - with_offset)

# It is mandatory to specify the dtype argument, otherwise NumPy thinks that we are dealing with strings.
array = np.arange('2015-04-22', '2015-05-22', 7, dtype='datetime64')
print(array)

2015-04-22
2015-04
1677-01-01T20:19
1677-01-02T05:19
-540 minutes
['2015-04-22' '2015-04-29' '2015-05-06' '2015-05-13' '2015-05-20']




In [26]:
"""
Weekly Summary
"""

dates, open, high, low, close=np.loadtxt('./data/data.csv', delimiter=',', usecols=(1, 3, 4, 5, 6), converters={1: datestr2num}, unpack=True)

# load the first 3 weeks in the sample
close, dates = close[:16], dates[:16]
print(dates)

# get the first Monday
first_monday = np.ravel(np.where(dates == 0))[0]
print("first monday: ", first_monday)

# get the last Friday
last_friday = np.ravel(np.where(dates == 4))[-1]
print("last friday: ", last_friday)

# create an array with the indices of all the days in the three weeks
weeks_indices = np.arange(first_monday, last_friday + 1)
print("Weeks indices initial", weeks_indices)

# split array in pieces of size 5
weeks_indices = np.split(weeks_indices, 3)
print("Weeks indices after split", weeks_indices)

# summarize() function
def summarize(a, o, h, l, c):
    monday_open = o[a[0]]
    week_high = np.max(np.take(h, a))
    week_low  = np.min(np.take(l, a))
    friday_close = c[a[-1]]
    return ("APPL", monday_open, week_high, week_low, friday_close)

# Call the apply_along_axis() function by supplying the name of our function; specifying the axis or dimension number (such as 1),
# the array to operate on, and a variable number of arguments for the summarize() function
weeksummary = np.apply_along_axis(summarize, 1, weeks_indices, open, high, low, close)
print("Week summary", weeksummary)

# store the data: The format string starts with a percent sign. Second is an optional flag. 
np.savetxt("./data/weeksummary.csv", weeksummary, delimiter=",", fmt="%s")

[4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4.]
first monday:  1
last two fridays:  10 15
Weeks indices initial [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
Weeks indices after split [array([1, 2, 3, 4, 5]), array([ 6,  7,  8,  9, 10]), array([11, 12, 13, 14, 15])]
Week summary [['APPL' '335.8' '346.7' '334.3' '346.5']
 ['APPL' '347.8' '360.0' '347.6' '356.8']
 ['APPL' '356.7' '364.9' '349.5' '350.5']]
