# Rescaled Range Analysis

In [2]:
import pandas as pd
import numpy as np

In [3]:
# read spot data
df_spot = pd.read_pickle('overall.pkl')
df_perp = pd.read_pickle('perp_overall.pkl')
df_spot.ticker.unique()

array(['BINANCE_SPOT_BTC_USDT', 'BINANCE_SPOT_ETH_USDT',
       'BINANCE_SPOT_BNB_USDT', 'BINANCE_SPOT_ADA_USDT',
       'BINANCE_SPOT_XRP_USDT', 'BINANCE_SPOT_MATIC_USDT',
       'BINANCE_SPOT_SOL_USDT', 'BINANCE_SPOT_UNI_USDT',
       'BINANCE_SPOT_AVAX_USDT'], dtype=object)

#### Returns for 1 series and Hurst exponent

In [4]:
# using btc_usdt as an example
btc_usdt = df_spot.loc[df_spot['ticker'] == 'BINANCE_SPOT_BTC_USDT']
btc_usdt = btc_usdt[['time_period_start', 'price_close', 'ticker']] 
btc_usdt.reset_index(drop=True, inplace=True)
btc_usdt['time_period_start'] = pd.to_datetime(btc_usdt['time_period_start'], utc=True)

# calculate returns
btc_usdt['ret'] = btc_usdt['price_close'].pct_change() 

# create an extra df from 1st Nov 2020 to start splitting into 63 dfs
res_btc_usdt = btc_usdt[btc_usdt['time_period_start'] >= '2020-11-01']

## One month worth of 1hr data

#### testing on array splitting to get the mean for each series

In [6]:
hurst_one_range = res_btc_usdt.head(672)
ranges = {
    'one range': hurst_one_range,
    'two ranges': np.array_split(hurst_one_range, 2),
    'four ranges': np.array_split(hurst_one_range, 4),
    'eight range': np.array_split(hurst_one_range, 8),
    '16 range': np.array_split(hurst_one_range, 16),
    '32 range': np.array_split(hurst_one_range, 32),
}

In [69]:
# every value in ranges dict from 'two ranges' onwards, we find the mean 
list(ranges.values())[1][0]['ret'].mean()

0.0004745478555720082

#### Mean calculation for 63 ranges (on btc_usdt)

In [44]:
# start the list with the one_range df
list_of_means = [list(ranges.values())[0]['ret'].mean()]
start_date = [list(ranges.values())[0].iloc[:, 0:1].values[0][0]]
end_date = [list(ranges.values())[0].iloc[:, 0:1].values[-1][0]]

# number of dfs from 2-32 ranges
range_values = [2, 4, 8, 16, 32]

# loop through each df in dict values to calculate its mean 
for i in range(1, len(ranges)):
    
    # loop through the range_values for each inner df
    for j in range(range_values[i-1]):
        inner_df = list(ranges.values())[i][j]
        mean_value = inner_df['ret'].mean()
        start_dt = inner_df.iloc[:, 0:1].values[0][0]
        end_dt = inner_df.iloc[:, 0:1].values[-1][0]
        
        # store the mean, start_date and end_date
        list_of_means.append(mean_value)
        start_date.append(start_dt)
        end_date.append(end_dt)
        


In [45]:
# create dataframe to store the means, start date and end date range
six_three_means = pd.DataFrame(data=list_of_means, columns=['means for 63 series'])
six_three_means['start_date'] = start_date
six_three_means['end_date'] = end_date

In [48]:
six_three_means.tail()

Unnamed: 0,means for 63 series,start_date,end_date
58,-0.000238,2020-11-24 15:00:00+00:00,2020-11-25 11:00:00+00:00
59,-0.006248,2020-11-25 12:00:00+00:00,2020-11-26 08:00:00+00:00
60,0.001148,2020-11-26 09:00:00+00:00,2020-11-27 05:00:00+00:00
61,-0.000709,2020-11-27 06:00:00+00:00,2020-11-28 02:00:00+00:00
62,0.00204,2020-11-28 03:00:00+00:00,2020-11-28 23:00:00+00:00


#### For these 63 dataframes find the deviation from its mean

In [51]:
# pull all dfs into 1 list
list_of_dfs = [hurst_one_range]
ranges = [2,4,8,16,32]
for r in ranges:
    range_dfs = np.array_split(hurst_one_range, r)
    for df in range_dfs:
        list_of_dfs.append(df)

In [55]:
# find the mean deviations
means = six_three_means['means for 63 series'].tolist()

for df, mean in zip(list_of_dfs, means):
    df['mean_deviation'] = df['ret'] - mean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [58]:
# show the mean_deviation for one_range_hurst
list_of_dfs[0]

Unnamed: 0,time_period_start,price_close,ticker,ret,mean_deviation
116,2020-11-01 00:00:00+00:00,13725.95,BINANCE_SPOT_BTC_USDT,-0.004717,-0.005116
117,2020-11-01 01:00:00+00:00,13759.90,BINANCE_SPOT_BTC_USDT,0.002473,0.002074
118,2020-11-01 02:00:00+00:00,13715.88,BINANCE_SPOT_BTC_USDT,-0.003199,-0.003599
119,2020-11-01 03:00:00+00:00,13690.01,BINANCE_SPOT_BTC_USDT,-0.001886,-0.002286
120,2020-11-01 04:00:00+00:00,13677.25,BINANCE_SPOT_BTC_USDT,-0.000932,-0.001332
...,...,...,...,...,...
783,2020-11-28 19:00:00+00:00,17814.50,BINANCE_SPOT_BTC_USDT,0.004767,0.004367
784,2020-11-28 20:00:00+00:00,17673.21,BINANCE_SPOT_BTC_USDT,-0.007931,-0.008331
785,2020-11-28 21:00:00+00:00,17741.05,BINANCE_SPOT_BTC_USDT,0.003839,0.003439
786,2020-11-28 22:00:00+00:00,17760.03,BINANCE_SPOT_BTC_USDT,0.001070,0.000670


#### sum of mean deviations for each dataframe

In [67]:
# create the running total of mean_deviation in each df (63)
mean_deviation_sums = [sum(df['mean_deviation']) for df in list_of_dfs]
max_difference_each_range = [df['mean_deviation'].max() - df['mean_deviation'].min() for df in list_of_dfs]
stdev = [df['ret'].std() for df in list_of_dfs]

# set new df columns
six_three_means['mean_deviation_sums'] = mean_deviation_sums
six_three_means['max_difference'] = max_difference_each_range
six_three_means['stdev'] = stdev
six_three_means['rescaled_range'] = six_three_means['mean_deviation_sums']/six_three_means['stdev']


In [75]:
six_three_means['range'] = np.nan
six_three_means.iloc[31:, -1:] = int(32)
six_three_means.iloc[15:31, -1:] = int(16)
six_three_means.iloc[7:15, -1:] = int(8)
six_three_means.iloc[3:7, -1:] = int(4)
six_three_means.iloc[1:3, -1:] = int(2)
six_three_means.iloc[0:1, -1:] = int(1)


In [76]:
six_three_means

Unnamed: 0,means for 63 series,start_date,end_date,mean_deviation_sums,max_difference,stdev,rescaled_range,range
0,0.000399,2020-11-01 00:00:00+00:00,2020-11-28 23:00:00+00:00,7.164408e-16,0.079565,0.007264,9.863559e-14,1.0
1,0.000475,2020-11-01 00:00:00+00:00,2020-11-14 23:00:00+00:00,1.032160e-16,0.043182,0.006202,1.664170e-14,2.0
2,0.000324,2020-11-15 00:00:00+00:00,2020-11-28 23:00:00+00:00,-7.424616e-16,0.079565,0.008197,-9.057411e-14,2.0
3,0.000450,2020-11-01 00:00:00+00:00,2020-11-07 23:00:00+00:00,-2.673643e-16,0.041628,0.006718,-3.979927e-14,4.0
4,0.000499,2020-11-08 00:00:00+00:00,2020-11-14 23:00:00+00:00,-8.673617e-19,0.040180,0.005660,-1.532432e-16,4.0
...,...,...,...,...,...,...,...,...
58,-0.000238,2020-11-24 15:00:00+00:00,2020-11-25 11:00:00+00:00,3.767603e-18,0.019206,0.005608,6.717858e-16,32.0
59,-0.006248,2020-11-25 12:00:00+00:00,2020-11-26 08:00:00+00:00,-2.081668e-17,0.050998,0.012020,-1.731878e-15,32.0
60,0.001148,2020-11-26 09:00:00+00:00,2020-11-27 05:00:00+00:00,-1.734723e-18,0.056399,0.013615,-1.274169e-16,32.0
61,-0.000709,2020-11-27 06:00:00+00:00,2020-11-28 02:00:00+00:00,0.000000e+00,0.034939,0.010740,0.000000e+00,32.0
