In [None]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# original.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
df = pd.read_csv('/kaggle/input/5years-dailystock-quotes/original.csv', delimiter=',')
df.dataframeName = 'original.csv'
nRow, nCol = df.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
#format date
new_format = "%Y-%m-%dT%H:%M:%SZ"
df['datetime'] = pd.to_datetime(df['datetime'], format=new_format)
df.rename(columns={'datetime':'date'}, inplace=True)
df

In [None]:
def timing_experiment(symbol, measured_days, step_size, trials = None):
    from collections import defaultdict
    import math

    assert isinstance(symbol, pd.DataFrame), "DataFrame is required for input"
    assert len(symbol.columns) == 1, "Can only support one symbol at a time."
    # assert isinstance(start_date, str)
    # assert isinstance(end_date, str)
    assert isinstance(measured_days, int)
    assert isinstance(step_size, int)

    #start and end dates need to be on a trading day
    start_date = '2012-01-06'
    end_date = '2021-01-29'
    max_end_date = str((symbol.index[len(symbol)-measured_days]).date())
    tests = defaultdict(list)

    if trials == None:
        trials = math.floor((len(symbol)-measured_days)/step_size)
        print("Trials not specified. Using maximum allowed trials: " + str(trials))
    elif trials > (len(symbol)-measured_days)/step_size:
        trials = math.floor((len(symbol)-measured_days)/step_size)
        print("Trials exceed maximum allowed. Using maximum allowed trials: " + str(trials))
        
    for i in range(0,trials):
        time_frame = symbol.iloc[0+i*step_size:measured_days+i*step_size,0].reset_index()
        values = time_frame.iloc[:,1]
        key_name = str(time_frame.iloc[0,0].date())
        test = values/values[0] - 1
        tests[key_name] = test
    return pd.DataFrame(tests)

In [None]:
#choose ticker and subset
ticker = 'WWE'
ticker = df['symbol']==ticker
stock = df[ticker].loc[df.first_valid_index():]

#trim columns and set index
col = ['date','close_price']
symbol = stock[col]
symbol = symbol.set_index('date')

#run
result = timing_experiment(
    symbol = symbol,
    measured_days = 253,
    step_size = 21
    )

result.plot(figsize=(16,6), legend = False)
plt.axhline(y=0, color='black', linestyle='-')
plt.show()

In [None]:
end_values = result.iloc[len(result)-1]
loss_count = len(end_values[end_values <= 0])

end_values.hist(bins = 10)

avg_return = end_values.mean()*100
print("Test period: %s to %s" % (result.columns[0],
    result.columns[len(result.columns)-1]))
print("Time in market: ~%5d years" % (len(result)/253))
print("Average return: %8.2f percent" % (avg_return))
print("Misc. Summary")
print(end_values.describe()[2:8]*100)
print("Loss ratio: %5d out of %5d (%8.2f percent)" % (loss_count, len(end_values), loss_count*100/len(end_values)))