In [None]:
import numpy as np
import pandas as pd
import pyarrow as pa
import matplotlib as plt

In [None]:
df = pd.read_parquet('data_adjclose.pq')

In [None]:
df

In [None]:
df.shift(1)

In [None]:
diff = (df - df.shift(1)) / df.shift(1)  # (today - yesterday) / yesterday => 0.1
diff

In [None]:
def compute_diff(df, shift):
    shifted = df.shift(shift)
    if shift > 0:
        return (df - shifted)/shifted  # (today - x_days_ago) / x_days_ago
    return (shifted - df)/df  # (x_days_later - today) / today

Simple Linear Regression:
$$
y = ax\\
y = \frac{price_{tomorrow} - price_{today}}{price_{today} }\\
x = \frac{price_{today} - price_{yesterday}}{price_{yesterday}}\\
\hat{y} = ax
$$


In [None]:
x = compute_diff(df, 1).fillna(0).values.flatten()[:, np.newaxis]
y = compute_diff(df, -1).fillna(0).values.flatten()
NAssets = df.shape[1]
NDates = df.shape[0]

In [None]:
a = np.linalg.lstsq(x, y, rcond=None)
my_y = x.dot(a[0]) # computing forecast

In [None]:
profit, loss = np.sum((my_y * y) > 0), np.sum((my_y * y) < 0)
print("percentage right:", profit / (profit + loss))

In [None]:
trade = my_y * 1000000

print("average daily pnl:", (trade * y).reshape(-1, NAssets).sum(axis=1).mean() )
print("average daily trade:", abs(trade).reshape(-1, NAssets).sum(axis=1).mean() )

In [None]:
daily_profit0 = pd.Series((trade * y).reshape(-1, NAssets).sum(axis=1))
daily_profit0.plot()
daily_profit0.cumsum().plot()

$$
y = ax + b
$$
Is the same as
$$
y = a_1x_1 + a_2x_2\\
x2 = 1
$$

In [None]:
x1 = compute_diff(df, 1).fillna(0).values.flatten()
x2 = np.ones(x1.shape)
x = np.concatenate([x1[:, None], x2[:, None]], axis=1)
y = compute_diff(df, -1).fillna(0).values.flatten()

In [None]:
a = np.linalg.lstsq(x, y, rcond=None)
my_y = x.dot(a[0])

profit, loss = np.sum((my_y * y) > 0), np.sum((my_y * y) < 0)
print("percentage right:", profit / (profit + loss))

trade = my_y * 1000000

print("average daily pnl:", (trade * y).reshape(-1, NAssets).sum(axis=1).mean() )
print("average daily trade:", abs(trade).reshape(-1, NAssets).sum(axis=1).mean() )


In [None]:
daily_profit1 = pd.Series((trade * y).reshape(-1, NAssets).sum(axis=1))
#daily_profit1.plot()
daily_profit0.cumsum().plot()
daily_profit1.cumsum().plot()

In [None]:
xlist = [compute_diff(df, i).fillna(0).values.flatten() for i in range(10)]
xlist += [ np.ones(x1.shape) ]
x = np.concatenate([xx[:, None] for xx in xlist], axis=1)
y = compute_diff(df, -1).fillna(0).values.flatten()

In [None]:
a = np.linalg.lstsq(x, y, rcond=None)
my_y = x.dot(a[0])

profit, loss = np.sum((my_y * y) > 0), np.sum((my_y * y) < 0)
print("percentage right:", profit / (profit + loss))

trade = my_y * 1000000

print("average daily pnl:", (trade * y).reshape(-1, NAssets).sum(axis=1).mean() )
print("average daily trade:", abs(trade).reshape(-1, NAssets).sum(axis=1).mean() )
print("average daily net trade:", trade.reshape(-1, NAssets).sum(axis=1).mean() )

In [None]:
xlist = [compute_diff(df, i).fillna(0).values.flatten() for i in range(100)]
xlist += [ np.ones(x1.shape) ]
x = np.concatenate([xx[:, None] for xx in xlist], axis=1)
y = compute_diff(df, -1).fillna(0).values.flatten()

In [None]:
a = np.linalg.lstsq(x, y, rcond=None)
my_y = x.dot(a[0])

profit, loss = np.sum((my_y * y) > 0), np.sum((my_y * y) < 0)
print("percentage right:", profit / (profit + loss))

trade = my_y * 100000000

print("average daily pnl:", (trade * y).reshape(-1, NAssets).sum(axis=1).mean() )
print("average daily trade:", abs(trade).reshape(-1, NAssets).sum(axis=1).mean() )
print("average daily net trade:", trade.reshape(-1, NAssets).sum(axis=1).mean() )

In [None]:
daily_profit2 = pd.Series((trade * y).reshape(-1, NAssets).sum(axis=1))

In [None]:
daily_profit0.cumsum().plot()
daily_profit1.cumsum().plot()
daily_profit2.cumsum().plot()