# Introduction
Machine Learning Algorithm that takes an asset's high, low, close, open interest and volume information, trains and predicts if the future realized volatility will be higher or lower than a user defined level.  

Preprossing functions cleans and creates data frames that looks ahead to a forward period from points in time and determines if a user defined volatility threshold has been exceeded.  

This allows for a Machine Learning Algorithm to train on the data set and make predictions.   

Within this notebook, a Machine Learning clustering Decision Tree algorithm trains on a financial asset's daily market information.  Function allows user to set a range of volatilities to test after training.  It then makes predictions based on the most recent days in the data set.  

# Data
The data used here is from BarChart.com.  I download futures data that is 'daily nearby', It has the symbol which eventually needs to be removed within the function.  In order to process, the first line of the data needs to be the columns names and all the null values need to be filled in manually.  

In [1]:
import pandas as pd
import numpy as np

In [2]:
from IPython.display import display # Allows the use of display() for DataFrames

# Pretty display for notebooks
%matplotlib inline

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
# Import the classifier from sklearn
from sklearn.tree import DecisionTreeClassifier

In [5]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [6]:
from sklearn.metrics import precision_recall_fscore_support

In [7]:
og = pd.read_csv(r"C:\Users\Matt\Desktop\eur_data.csv")

In [8]:
len(og)

2017

In [9]:
def vol_convert(vol):
    rate = vol/1600
    return rate

In [10]:
start_vol = 4
end_vol = 10
step = .15
hist_period1 = 10
hist_period2 = 20
hist_period3 = 30
volume_period1 = 2
volume_period2 = 4
forward_vol_period = 20
vol_to_test = 6
rate = vol_convert(vol_to_test)

In [11]:
og.tail()

Unnamed: 0,Date Time,Symbol,Open,High,Low,Close,Change,Volume,Open Interest
2012,12/31/2019,E6H20,1.12565,1.1293,1.12515,1.1282,0.00255,120673,557585
2013,1/2/2020,E6H20,1.1267,1.12755,1.12135,1.1217,-0.0065,180883,555585
2014,1/3/2020,E6H20,1.1222,1.1229,1.11735,1.1217,0.0,171069,551151
2015,1/6/2020,E6H20,1.12085,1.12545,1.1206,1.12405,0.00235,141205,550100
2016,1/7/2020,E6H20,1.1246,1.12465,1.12025,1.12055,-0.0035,140000,550100


In [12]:
data = og.copy()

In [13]:
#process dataset ready for machine learning, add custome columns, forward vol uses mean close to close and abs change 

def process(dataframe, hist1, hist2, hist3, volume1, volume2, forward_vol_period, question_vol, cushion):
    
    
    dataframe['abs_change'] = dataframe['Change'].abs() / dataframe['Close']
    
    dataframe['high_move'] = (((dataframe.High - dataframe.Close.shift(1))/dataframe['Close']).abs())
    dataframe['low_move'] = (((dataframe.Low - data.Close.shift(1))/dataframe['Close']).abs())
    dataframe['max_move'] = dataframe[['high_move', 'low_move']].max(axis=1)
    
    dataframe['hist_max_1'] = dataframe.max_move.rolling(window=hist1).mean()
    dataframe['hist_max_2'] = dataframe.max_move.rolling(window=hist2).mean()
    dataframe['hist_max_3'] = dataframe.max_move.rolling(window=hist3).mean()
    
    dataframe['hist_change_1'] = dataframe.abs_change.rolling(window=hist1).mean()
    dataframe['hist_change_2'] = dataframe.abs_change.rolling(window=hist2).mean()
    dataframe['hist_change_3'] = dataframe.abs_change.rolling(window=hist3).mean()
    
    dataframe['avg_max_close'] = dataframe[['max_move', 'abs_change']].mean(axis=1)
    
    dataframe['hist_avgmax_1'] = dataframe.avg_max_close.rolling(window=hist1).mean()
    dataframe['hist_avgmax_2'] = dataframe.avg_max_close.rolling(window=hist2).mean()
    dataframe['hist_avgmax_3'] = dataframe.avg_max_close.rolling(window=hist3).mean()
    
    dataframe['hist_volume_1'] = dataframe.Volume.rolling(window=volume1).mean()
    dataframe['hist_volume_2'] = dataframe.Volume.rolling(window=volume2).mean()
    
    dataframe['backtothefuture'] = dataframe.avg_max_close.rolling(window=forward_vol_period).mean()
    
    dataframe['forward_avg_close_max'] = dataframe['backtothefuture'].shift(-forward_vol_period)
    
    
    
    
    
    
    dataframe['volatile'] = (dataframe['forward_avg_close_max'] > question_vol + cushion)
    dataframe = dataframe.applymap(lambda x: 1 if x == True else x)
    dataframe = dataframe.applymap(lambda x: 0 if x == False else x)
    #drop column 
    del dataframe['Symbol']
    del dataframe['Date Time']
    del dataframe['Open']
    del dataframe['High']
    del dataframe['Low']
    del dataframe['Close']
    del dataframe['backtothefuture']
    
    return dataframe 

In [14]:
#set and process dataset 
new_data = process(data,hist_period1 ,hist_period2 ,hist_period3 ,volume_period1,volume_period2,forward_vol_period, rate, 0)

In [15]:
new_data.tail()

Unnamed: 0,Change,Volume,Open Interest,abs_change,high_move,low_move,max_move,hist_max_1,hist_max_2,hist_max_3,...,hist_change_2,hist_change_3,avg_max_close,hist_avgmax_1,hist_avgmax_2,hist_avgmax_3,hist_volume_1,hist_volume_2,forward_avg_close_max,volatile
2012,0.00255,120673,557585,0.00226,0.003235,0.000443,0.003235,0.003253,0.00362,0.003349,...,0.002133,0.002013,0.002748,0.002713,0.002877,0.002681,137520.0,124611.75,,0
2013,-0.0065,180883,555585,0.005795,0.000579,0.006107,0.006107,0.003628,0.00386,0.003441,...,0.002419,0.002141,0.005951,0.003184,0.003139,0.002791,150778.0,157867.5,,0
2014,0.0,171069,551151,0.0,0.00107,0.003878,0.003878,0.003676,0.003898,0.003532,...,0.002387,0.002134,0.001939,0.00306,0.003143,0.002833,175976.0,156748.0,,0
2015,0.00235,141205,550100,0.002091,0.003336,0.000979,0.003336,0.003773,0.003923,0.003565,...,0.002386,0.002175,0.002713,0.003188,0.003154,0.00287,156137.0,153457.5,,0
2016,-0.0035,140000,550100,0.003123,0.000535,0.003391,0.003391,0.003591,0.003815,0.003599,...,0.002338,0.002242,0.003257,0.003036,0.003077,0.00292,140602.5,158289.25,,0


In [16]:
new_data['forward_avg_close_max'].describe()

count    1997.000000
mean        0.004923
std         0.001537
min         0.001984
25%         0.003904
50%         0.004727
75%         0.005725
max         0.010881
Name: forward_avg_close_max, dtype: float64

In [17]:
new_data['volatile'].value_counts()

1    1588
0     429
Name: volatile, dtype: int64

In [18]:
new_data = new_data.dropna(how = 'any')

In [19]:
new_data.head(20)

Unnamed: 0,Change,Volume,Open Interest,abs_change,high_move,low_move,max_move,hist_max_1,hist_max_2,hist_max_3,...,hist_change_2,hist_change_3,avg_max_close,hist_avgmax_1,hist_avgmax_2,hist_avgmax_3,hist_volume_1,hist_volume_2,forward_avg_close_max,volatile
30,-0.0002,238653,287495,0.000151,0.001586,0.002643,0.002643,0.006906,0.007666,0.008293,...,0.004079,0.004795,0.001397,0.005432,0.005872,0.006544,311980.5,304230.75,0.006502,1
31,0.0091,322144,280074,0.006824,0.010198,0.000975,0.010198,0.007609,0.007798,0.008468,...,0.004183,0.004934,0.008511,0.006105,0.00599,0.006701,280398.5,294831.0,0.006251,1
32,0.0124,265826,276830,0.009212,0.011293,0.001634,0.011293,0.008197,0.007985,0.008501,...,0.004556,0.004992,0.010253,0.00672,0.00627,0.006746,293985.0,302982.75,0.006104,1
33,-0.0062,261685,272859,0.004628,0.00097,0.006941,0.006941,0.007881,0.007851,0.008348,...,0.00442,0.004803,0.005784,0.006346,0.006135,0.006576,263755.5,272077.0,0.006158,1
34,0.006,253899,271282,0.004458,0.005573,0.000594,0.005573,0.007583,0.007638,0.007995,...,0.004334,0.004531,0.005016,0.006295,0.005986,0.006263,257792.0,275888.5,0.006001,1
35,-0.0119,351274,275227,0.008921,0.002174,0.01072,0.01072,0.0077,0.00783,0.007967,...,0.004628,0.004692,0.009821,0.006376,0.006229,0.006329,302586.5,283171.0,0.005645,1
36,-0.0023,252978,273182,0.001727,0.001352,0.004281,0.004281,0.007378,0.007531,0.007725,...,0.00444,0.004448,0.003004,0.006182,0.005986,0.006087,302126.0,279959.0,0.005696,1
37,-0.011,273141,279851,0.00833,0.001363,0.009693,0.009693,0.007602,0.007742,0.0077,...,0.0048,0.004476,0.009011,0.006414,0.006271,0.006088,263059.5,282823.0,0.005504,1
38,0.0021,225475,275921,0.001588,0.002797,0.003402,0.003402,0.007501,0.007623,0.00768,...,0.004837,0.004498,0.002495,0.006375,0.00623,0.006089,249308.0,275717.0,0.005486,1
39,-0.0121,293372,285604,0.009232,0.0,0.009385,0.009385,0.007413,0.007612,0.007657,...,0.005189,0.004563,0.009309,0.00646,0.006401,0.00611,259423.5,261241.5,0.00547,1


In [20]:
# Store the 'Survived' feature in a new variable and remove it from the dataset
outcomes = new_data['volatile']


del new_data['volatile']
del new_data['forward_avg_close_max']

# removed 'Symbol' in process function
#del new_data['Symbol']

features = new_data

# Show the new dataset with 'Survived' removed
features.head()

Unnamed: 0,Change,Volume,Open Interest,abs_change,high_move,low_move,max_move,hist_max_1,hist_max_2,hist_max_3,hist_change_1,hist_change_2,hist_change_3,avg_max_close,hist_avgmax_1,hist_avgmax_2,hist_avgmax_3,hist_volume_1,hist_volume_2
30,-0.0002,238653,287495,0.000151,0.001586,0.002643,0.002643,0.006906,0.007666,0.008293,0.003957,0.004079,0.004795,0.001397,0.005432,0.005872,0.006544,311980.5,304230.75
31,0.0091,322144,280074,0.006824,0.010198,0.000975,0.010198,0.007609,0.007798,0.008468,0.004601,0.004183,0.004934,0.008511,0.006105,0.00599,0.006701,280398.5,294831.0
32,0.0124,265826,276830,0.009212,0.011293,0.001634,0.011293,0.008197,0.007985,0.008501,0.005244,0.004556,0.004992,0.010253,0.00672,0.00627,0.006746,293985.0,302982.75
33,-0.0062,261685,272859,0.004628,0.00097,0.006941,0.006941,0.007881,0.007851,0.008348,0.004811,0.00442,0.004803,0.005784,0.006346,0.006135,0.006576,263755.5,272077.0
34,0.006,253899,271282,0.004458,0.005573,0.000594,0.005573,0.007583,0.007638,0.007995,0.005007,0.004334,0.004531,0.005016,0.006295,0.005986,0.006263,257792.0,275888.5


In [21]:
X_train, X_test, y_train, y_test = train_test_split(features, outcomes, test_size=0.30, random_state=42)

In [22]:
# TODO: Define the classifier, and fit it to the data
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [23]:
# Making predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Calculate the accuracy
from sklearn.metrics import accuracy_score
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
print('The training accuracy is', train_accuracy)
print('The test accuracy is', test_accuracy)

The training accuracy is 1.0
The test accuracy is 0.868020304568528


In [24]:
# Training the model
model = DecisionTreeClassifier(max_depth=15, min_samples_leaf=20, min_samples_split=20)
model.fit(X_train, y_train)

# Making predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Calculating accuracies
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print('The training accuracy is', train_accuracy)
print('The test accuracy is', test_accuracy)

The training accuracy is 0.9127906976744186
The test accuracy is 0.8934010152284264


In [25]:
def process_sample(dataframe, hist1, hist2, hist3, volume1, volume2):
    
    dataframe['abs_change'] = dataframe['Change'].abs() / dataframe['Close']
    
    dataframe['high_move'] = (((dataframe.High - dataframe.Close.shift(1))/dataframe['Close']).abs())
    dataframe['low_move'] = (((dataframe.Low - data.Close.shift(1))/dataframe['Close']).abs())
    dataframe['max_move'] = dataframe[['high_move', 'low_move']].max(axis=1)
    
    dataframe['hist_max_1'] = dataframe.max_move.rolling(window=hist1).mean()
    dataframe['hist_max_2'] = dataframe.max_move.rolling(window=hist2).mean()
    dataframe['hist_max_3'] = dataframe.max_move.rolling(window=hist3).mean()
    
    dataframe['hist_change_1'] = dataframe.abs_change.rolling(window=hist1).mean()
    dataframe['hist_change_2'] = dataframe.abs_change.rolling(window=hist2).mean()
    dataframe['hist_change_3'] = dataframe.abs_change.rolling(window=hist3).mean()
    
    dataframe['avg_max_close'] = dataframe[['max_move', 'abs_change']].mean(axis=1)
    
    dataframe['hist_avgmax_1'] = dataframe.avg_max_close.rolling(window=hist1).mean()
    dataframe['hist_avgmax_2'] = dataframe.avg_max_close.rolling(window=hist2).mean()
    dataframe['hist_avgmax_3'] = dataframe.avg_max_close.rolling(window=hist3).mean()
    
    dataframe['hist_volume_1'] = dataframe.Volume.rolling(window=volume1).mean()
    dataframe['hist_volume_2'] = dataframe.Volume.rolling(window=volume2).mean()
    

    #drop column 
    del dataframe['Open']
    del dataframe['High']
    del dataframe['Low']
    del dataframe['Close']
    
    return dataframe 

In [26]:
new_og = process_sample(og, hist_period1 ,hist_period2 ,hist_period3 , volume_period1, volume_period2)

In [27]:
new_og.tail()

Unnamed: 0,Date Time,Symbol,Change,Volume,Open Interest,abs_change,high_move,low_move,max_move,hist_max_1,...,hist_max_3,hist_change_1,hist_change_2,hist_change_3,avg_max_close,hist_avgmax_1,hist_avgmax_2,hist_avgmax_3,hist_volume_1,hist_volume_2
2012,12/31/2019,E6H20,0.00255,120673,557585,0.00226,0.003235,0.000443,0.003235,0.003253,...,0.003349,0.002173,0.002133,0.002013,0.002748,0.002713,0.002877,0.002681,137520.0,124611.75
2013,1/2/2020,E6H20,-0.0065,180883,555585,0.005795,0.000579,0.006107,0.006107,0.003628,...,0.003441,0.002739,0.002419,0.002141,0.005951,0.003184,0.003139,0.002791,150778.0,157867.5
2014,1/3/2020,E6H20,0.0,171069,551151,0.0,0.00107,0.003878,0.003878,0.003676,...,0.003532,0.002444,0.002387,0.002134,0.001939,0.00306,0.003143,0.002833,175976.0,156748.0
2015,1/6/2020,E6H20,0.00235,141205,550100,0.002091,0.003336,0.000979,0.003336,0.003773,...,0.003565,0.002604,0.002386,0.002175,0.002713,0.003188,0.003154,0.00287,156137.0,153457.5
2016,1/7/2020,E6H20,-0.0035,140000,550100,0.003123,0.000535,0.003391,0.003391,0.003591,...,0.003599,0.002481,0.002338,0.002242,0.003257,0.003036,0.003077,0.00292,140602.5,158289.25


In [28]:
# delete all non-floats

del new_og['Date Time']
del new_og['Symbol']


In [29]:
new_og.tail()

Unnamed: 0,Change,Volume,Open Interest,abs_change,high_move,low_move,max_move,hist_max_1,hist_max_2,hist_max_3,hist_change_1,hist_change_2,hist_change_3,avg_max_close,hist_avgmax_1,hist_avgmax_2,hist_avgmax_3,hist_volume_1,hist_volume_2
2012,0.00255,120673,557585,0.00226,0.003235,0.000443,0.003235,0.003253,0.00362,0.003349,0.002173,0.002133,0.002013,0.002748,0.002713,0.002877,0.002681,137520.0,124611.75
2013,-0.0065,180883,555585,0.005795,0.000579,0.006107,0.006107,0.003628,0.00386,0.003441,0.002739,0.002419,0.002141,0.005951,0.003184,0.003139,0.002791,150778.0,157867.5
2014,0.0,171069,551151,0.0,0.00107,0.003878,0.003878,0.003676,0.003898,0.003532,0.002444,0.002387,0.002134,0.001939,0.00306,0.003143,0.002833,175976.0,156748.0
2015,0.00235,141205,550100,0.002091,0.003336,0.000979,0.003336,0.003773,0.003923,0.003565,0.002604,0.002386,0.002175,0.002713,0.003188,0.003154,0.00287,156137.0,153457.5
2016,-0.0035,140000,550100,0.003123,0.000535,0.003391,0.003391,0.003591,0.003815,0.003599,0.002481,0.002338,0.002242,0.003257,0.003036,0.003077,0.00292,140602.5,158289.25


In [30]:
last_date = new_og.index[-1]
last_date

2016

In [31]:
one_sample = [new_og.iloc[last_date,:]] 

In [32]:
model.predict(one_sample)

array([0], dtype=int64)

In [33]:
def tail_vol(period, dataframe):
    rate = dataframe['avg_max_close'].tail(period).mean()
    vol = round((rate*100*16),2)
    
    return vol

In [34]:
tail_vol(20, new_og)

4.92

In [35]:
answer = model.predict(one_sample)
answer = answer[0]
answer

0

In [36]:
def find_vol(dataframe, start_vol, end_vol, step, hist_period1, hist_period2, hist_period3, 
             volume_period1, volume_period2, forward_vol_period):
    vol = start_vol
    while vol < end_vol: 
        answer = 1 
        rate = vol_convert(vol)
        beg_frame = dataframe.copy()
        
        frame = process(dataframe,hist_period1 ,hist_period2 ,hist_period3 ,volume_period1,volume_period2,
                        forward_vol_period, rate, 0)
   
    
        frame = frame.dropna(how = 'any')
    
        outcomes = frame['volatile']
        features = frame

        del frame['volatile']
        del frame['forward_avg_close_max']
        X_train, X_test, y_train, y_test = train_test_split(features, outcomes, test_size=0.30, random_state=42)
        # Training the model
        model = DecisionTreeClassifier(max_depth=15, min_samples_leaf=20, min_samples_split=20)
        model.fit(X_train, y_train)

        # Making predictions
        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

        # Calculating accuracies
        train_accuracy = accuracy_score(y_train, y_train_pred)
        test_accuracy = accuracy_score(y_test, y_test_pred)
        precision = precision_score(y_test, y_test_pred)
        recall = recall_score(y_test, y_test_pred)
        the_f1 = f1_score(y_test, y_test_pred)
        
   
    
        original_data = process_sample(beg_frame, hist_period1 ,hist_period2 ,hist_period3 , volume_period1, volume_period2)
        #original_data = original_data.dropna(how = 'any')
        
        del original_data['Date Time']
        last_row = original_data.index[-1]
        one_sample = [new_og.iloc[last_row,:]] 
    
        answer = model.predict(one_sample)
        answer = answer[0]
        print(vol)
        print(answer)
        print('The training accuracy is', train_accuracy)
        print('The test accuracy is', test_accuracy)
        print('The precision is', precision)
        print('The recall is', recall)
        print('The F1 is', the_f1 )
        
        
        print('The number of days', len(outcomes))
        print('The percentage of volatile days', ((outcomes == 1).sum()) / len(outcomes))
        
        
        
        #print(frame.head(1))
    
        #if answer == 0:
            #break
        
        vol = vol + step


In [45]:
fv_dataframe = pd.read_csv(r"C:\Users\Matt\Desktop\eur_data.csv")
vol = 4.75
start_vol = 2
end_vol = 6
step = .25
hist_period1 = 10
hist_period2 = 15
hist_period3 = 20
volume_period1 = 4
volume_period2 = 8
forward_vol_period = 25
rate = vol_convert(vol)

In [46]:
find_vol(fv_dataframe, start_vol, end_vol, step, hist_period1, hist_period2, hist_period3, 
         volume_period1, volume_period2, forward_vol_period)

2
1
The training accuracy is 1.0
The test accuracy is 1.0
The precision is 1.0
The recall is 1.0
The F1 is 1.0
The number of days 1972
The percentage of volatile days 1.0
2.25
1
The training accuracy is 1.0
The test accuracy is 1.0
The precision is 1.0
The recall is 1.0
The F1 is 1.0
The number of days 1972
The percentage of volatile days 1.0
2.5
1
The training accuracy is 1.0
The test accuracy is 1.0
The precision is 1.0
The recall is 1.0
The F1 is 1.0
The number of days 1972
The percentage of volatile days 1.0
2.75
1
The training accuracy is 1.0
The test accuracy is 1.0
The precision is 1.0
The recall is 1.0
The F1 is 1.0
The number of days 1972
The percentage of volatile days 1.0
3.0
1
The training accuracy is 1.0
The test accuracy is 1.0
The precision is 1.0
The recall is 1.0
The F1 is 1.0
The number of days 1972
The percentage of volatile days 1.0
3.25
1
The training accuracy is 1.0
The test accuracy is 1.0
The precision is 1.0
The recall is 1.0
The F1 is 1.0
The number of days 19

In [47]:
ex = process(fv_dataframe,hist_period1 ,hist_period2 ,hist_period3 ,volume_period1,volume_period2,
                        forward_vol_period, rate, 0)

In [48]:
del ex['forward_avg_close_max']
del ex['volatile']

test_rate = vol_convert(vol)

ex['volatile'] = (ex['avg_max_close'] > test_rate)

ex = ex.applymap(lambda x: 0 if x == False else x)
ex = ex.applymap(lambda x: 1 if x == True else x)

In [49]:
ex = ex.dropna(how = 'any')
len(ex)

1997

In [50]:
print('the 5 day scalp average vol is', tail_vol(5, ex) )
print('the 10 day scalp average vol is', tail_vol(10, ex) )
print('the 20 day scalp average vol is', tail_vol(20, ex) )
print('the 40 day scalp average vol is', tail_vol(40, ex) )
print('the 60 day scalp average vol is', tail_vol(60, ex) )
print('the 90 day scalp average vol is', tail_vol(40, ex) )
print('the 120 day scalp average vol is', tail_vol(60, ex) )


the 5 day scalp average vol is 5.31
the 10 day scalp average vol is 4.86
the 20 day scalp average vol is 4.92
the 40 day scalp average vol is 4.26
the 60 day scalp average vol is 4.33
the 90 day scalp average vol is 4.26
the 120 day scalp average vol is 4.33


In [51]:
fourty = ex.tail(40)
thirty = ex.tail(30)
twenty = ex.tail(20)
ten = ex.tail(10)
five = ex.tail(5)

#new['volatile'].sum()/len(new)

In [52]:
print('vol is', vol)
print('the percentage of volatile days over the last 5 days:', five['volatile'].sum()/len(five) )
print('the percentage of volatile days over the last 10 days:', ten['volatile'].sum()/len(ten) )
print('the percentage of volatile days over the last 20 days:', twenty['volatile'].sum()/len(twenty) )
print('the percentage of volatile days over the last 30 days:', thirty['volatile'].sum()/len(thirty) )
print('the percentage of volatile days over the last 40 days:', fourty['volatile'].sum()/len(fourty) )


vol is 4.75
the percentage of volatile days over the last 5 days: 0.4
the percentage of volatile days over the last 10 days: 0.3
the percentage of volatile days over the last 20 days: 0.4
the percentage of volatile days over the last 30 days: 0.36666666666666664
the percentage of volatile days over the last 40 days: 0.275


In [None]:
five.tail(1)

In [None]:
len(ex)