In [16]:
import pandas as pd
import numpy as np 
import os
import sys
import matplotlib.pyplot as plt
import datetime
from tqdm.notebook import tqdm
import plotly.express as px
import plotly.graph_objects as go
import statistics 
from statistics import mode 
import math

Goal : Given an asset A and time intervals - t1 and t2, we study the following effect. 

Suppose the returns of asset A resampled over t1 is above a certain threshold which we call the pump threshold. We then ask what effect a spike in returns over the pump threshold might have over the interval - t2 starting from the end of interval t1. 

For example, the price of BTC goes up by over 10% in 4 minutes. We might want to know if this carries over for the next minute. A lot of people look at candlestick charts on the exchanges which are which usually run over intervals of size 1m,3m,5m,15m,30m,1H,2H,4H. Hence, could one might expect behaviour over intervals outside this set to remain in sync ?  

### Loading df

In [17]:
def load_df(path):
    """
    input : 
           path - string which indicates where csv file is located. 
           
    output :
           df - convert csv to df and counts the total NaN in the dataframe. 
    """
    df = pd.read_csv(path)
    df.columns = ['timestamp','open','high','low','close','volume']
    df['date'] = pd.to_datetime(df['timestamp'], unit = 'ms')
    df = df.set_index(pd.DatetimeIndex(df['date']))
    df = df.drop(['date'],axis = 1)        
    df['returns'] = df['open'].pct_change()
    print('Total NaN : ' + "\n")
    print(df.isna().sum())
    print('')
    return(df)

### Checking for missing values

In [18]:
def check_for_missing_timestamps(df):
    """
    input : df - dataframe with the OHLCV data. 
           
    output : count - total number of holes in the data. By hole we mean an interval for which there is no data. 
             
             missing_indices - list of indices which are the starting points for missing data. 
    """
    
    m = len(df)
    X = df['timestamp'].values
    count = 0
    missing_indices = []
    
    
    l_temp = [X[i]-X[i-1] for i in range(1,len(X))]
    interval_length = mode(l_temp)    # We assume that we more or less have all the data. 
    
    
    for i in range(1,m):
        if X[i]-X[i-1] != interval_length:
            count += 1
            error = np.timedelta64(X[i]-X[i-1],'ms')
            missing_indices.append((i,error))
    
    t = (np.array(missing_indices).sum())
    t = t.astype('int')
    t = t/60000 
    
    print('Number of instances for which we have intervals of missing data is {}.'.format(count))
    print('')
    print('Total amount of missing time in dataframe is {} minutes'.format(t))
    
    return(count,missing_indices)     

In [19]:
def resample(df,interval):
    """
    Input : df -- dataframe OHLCV with pandas datetime as index.
            interval -- string eg. 1T,1H,1D,1m 
    
    Output : df resampled. 
    
    In the resample code, T -- minute eg 5T will resample for 5 min intervals,
    H -- hours, D -- days, m -- months.
    """
    
    ohlc_dict = {
        'open':'first',
        'high':'max',
        'low':'min',
        'close':'last',
        'volume':'sum'
        }
    
    df = df.resample(interval).agg(ohlc_dict)
    
    df['returns'] = df['open'].pct_change()
    
    df = df.dropna(axis = 0)
    
    return(df)

In [20]:
def plot_daily_price_volume(df):
    df_1d = resample(df,'1D')

    area_chart = px.area(df_1d.close, title = 'Daily price')
    
    area_chart.update_xaxes(title_text = 'Date')
    area_chart.update_yaxes(title_text = 'close price', tickprefix = '$')
    area_chart.update_layout(showlegend = False)
    area_chart.show()
    
    area_chart = px.area(df_1d.volume, title = 'Daily volume')

    area_chart.update_xaxes(title_text = 'Date')
    area_chart.update_yaxes(title_text = 'volume')
    area_chart.update_layout(showlegend = False)
    area_chart.show()

In [21]:
def check_for_pump_action(df,pump_thresh,a = np.inf):
    """
    input - 
        df : dataframe with a column for returns data.
        pump_thresh : pump threshold
        
    output -
        singular_indices : indices where the returns data is above 
                       the threshold
    """
    if a == np.inf:
        a = np.percentile(df['returns'],pump_thresh)
    
    m = len(df)
    singular_indices = []
    for i in range(m):
        if df['returns'][i] >= a:
            singular_indices.append(i)
    singular_date_indices = df.index[singular_indices]        
    return(singular_indices,singular_date_indices)

In [109]:
def index_mapping(df1,df2):
    """
    input - 
        df1,df2 : Dataframes with indices in pd.Datetime format.
                  Assume len(df1) is less than len(df2) i.e. df1 is finer than df2.
    output -
        index_mapping_before : dict which maps an index i in df2 to the first index j in df1 such that j is before i.  
        index_mapping_after : dict which maps an index i in df2 to the first index j in df1 such that j is after i. 
    """
    
    m1 = len(df1)
    m2 = len(df2)
    
    i = 0
    j = 0
    current = df2.index[j]
    
    index_mapping_before = {}
    index_mapping_after = {}
    
    while i < m1:
        if df1.index[i] <= current:
            i += 1
        
        else:
            index_mapping_before[current] = df1.index[i-1]
            index_mapping_after[current] = df1.index[i]
            i += 1
            j += 1
            if j >= m2:
                break
            else: 
                current = df2.index[j]
                
    return(index_mapping_before,index_mapping_after)

In [23]:
def get_dict_datetime_index(df):
    """
    input - 
        df : Dataframes with index in pd.Datetime format.
        
    output -
        d : dict which maps every datetime index to its index position in the list. 
    """
    
    l = list(df.index)
    d = {}
    for i,a in enumerate(l):
        d[a] = i
    return(d)    

The following function will be used in the following way : 

I) Input data :

a)Start with df1 which will be the baseline dataframe where the data is probably sampled minutewise. 

b) The dataframe df2 has data resampled at a coarser rate eg. 5 minute intervals. 

c) We calculate the singular indices for df2. These indicate instances in the time series 
when there was a big jump in the returns of df2. 

d) The datetime values at the singular indices are called singular_date_indices. 

e) The follow up measure is the time interval following the singular event that you are interested. 
   For instance - Assume follow_up_measure is 2 minutes. On the 5 minute chart, there is a big positive change in        returns. We would like to know if this price momentum will carry over for the next 2 minutes i.e. if we will have 
   positive returns over the 2 minute interval. 
   
II) Output data :

a) tranches - array of arrays of length follow_up_measure (base unit will be the units of df1) - sections of data of time interval follow_up_measure following the singular event.

b) singular_indices_with_holes_after - those instances where we might not have data following the pump i.e. following    the 5 minute pump, there is no data. 

In [24]:
def get_follow_up(df1,df2,singular_indices,singular_date_indices,follow_up_measure,no_deets_pls = False):
    """
    See above for notes on the function. 
    """
    
    index_mapping_before, index_mapping_after = index_mapping(df1,df2)
    r = min(100,len(df2))-1
    mode2 = mode([df2.index[i]- df2.index[i-1] for i in range(r)])
    
    singular_indices_with_holes_after = []
    
    tranches = []
    
    d1 = get_dict_datetime_index(df1)
    d2 = get_dict_datetime_index(df2)
    
    for a in (singular_date_indices):
        a_ind = d2[a]
        if a_ind + 1 <= len(df2)-1:
            if df2.index[a_ind + 1] - a != mode2:
                singular_indices_with_holes_after.append(a)
            else:
                index_df = index_mapping_after[a]
                
                tranch = []            
                tmp = d1[index_df]
                i = tmp
                
                while df1.index[i] - index_df < follow_up_measure:
                    tranch.append(df1.index[i])
                    i += 1
                #print(len(tranch))    
                tranches.append(tranch)    
                
        else:
            singular_indices_with_holes_after.append(a)
            
    if not no_deets_pls:
        print('The percentage of singular indices for which the follow up data has missing information is {}'.format(len(singular_indices_with_holes_after)/len(singular_indices))) 
           
    return(tranches,singular_indices_with_holes_after)


In [25]:
def tranch_quality_check(tranches):
    """Check the quality of the output of the tranches we get from the function above."""
    
    mode1 = mode([len(x) for x in tranches])
    count = 0
    for x in tranches:
        if len(x) != mode1:
            count += 1
    print('The percentage of singular indices for which the follow up time interval has missing data is {}'.format(count/len(tranches)))        
    return(count)

In [26]:
def aggregate_df(df,d):
    """
    Used to take the tranch data and then build a dataframe from all the tranches i.e. aggregate the
    information on each tranch and then put it together in a dataframe so we can analyze it later. 
    """
    d['date'].append(df.index[0])
    d['open'].append(df['open'][0])
    d['high'].append(max(df['high']))
    d['low'].append(min(df['low']))
    d['close'].append(df['close'][-1])
    d['volume'].append(sum(df['volume']))
    d['intra_interval_returns'].append(100*(df['close'][-1] - df['open'][0])/df['open'][0])
    return(d)

In [27]:
def create_follow_up_df(df,tranches):
    """
    Aggregates tranches to get dataframe. 
    """
    
    d = {}
    for x in ['date','open','high','low','close','volume','intra_interval_returns']:
        d[x] = []
        
    for t in tranches:
        e = df.loc[t]
        d = aggregate_df(e,d) 
    
    follow_up_df = pd.DataFrame(d)
    follow_up_df = follow_up_df.set_index('date')
    
    return(follow_up_df)

In [28]:
def get_probability_of_success(df,pump_threshold_value,resample_measure,follow_up_measure,no_deets_pls = False):
    """
Input - df - dataframe with returns column and index of pd.Timedeltas.
        pump_threshold_value - the pump_threshold'th percentile of returns.
        resample_measure - pd.Timedelta or string object - this is the time measure over 
                           for which we want to study big price spikes. 
        follow_up_measure - pd.Timedelta object - this is the time measure over 
                           for which we want to study the IMPACT of the big price spike.
                           
Output - p - probability that the returns in the follow_up_measure is positive if
             we had a big hike over the resample measure period. 
    """
    df_resampled = resample(df,resample_measure) 
    singular_indices, singular_date_indices = check_for_pump_action(df_resampled,_,pump_threshold_value)
    p,q = index_mapping(df,df_resampled)
         
    tranches,singular_indices_with_holes_after = get_follow_up(df,df_resampled,singular_indices,singular_date_indices,follow_up_measure,no_deets_pls)    
    
    follow_up_df = create_follow_up_df(df,tranches)
    
    if len(follow_up_df) > 0:
        p = len(follow_up_df[follow_up_df['intra_interval_returns'] > 0])/len(follow_up_df)
    else:
        p = 'NaN'
    
    return(p) 



In [29]:
""" Example way to run the above section of the notebook."""

path = '../Binance/crypto_pumps/data/raw/Binance/BTC_USDT.csv'

df_baseline = load_df(path)

df_baseline = df_baseline.dropna(axis = 0)

check_for_missing_timestamps(df_baseline)

df_resampled = resample(df_baseline,'5T')

singular_indices, singular_date_indices = check_for_pump_action(df_resampled,95)

p,q = index_mapping(df_baseline,df_resampled)

follow_up_measure = pd.Timedelta(3,unit = "m")

tranches,singular_indices_with_holes_after = get_follow_up(df_baseline,df_resampled,singular_indices,singular_date_indices,follow_up_measure)

Total NaN : 

timestamp    0
open         0
high         0
low          0
close        0
volume       0
returns      1
dtype: int64

Number of instances for which we have intervals of missing data is 22.

Total amount of missing time in dataframe is 5160.963466666667 minutes
The percentage of singular indices for which the follow up data has missing information is 0.0


In [31]:
tranch_quality_check(tranches)

follow_up_df = create_follow_up_df(df_baseline,tranches)

The percentage of singular indices for which the follow up time interval has missing data is 0.0


In [32]:
# probability that strategy would be successful (this honestly makes no sense since we are 
# after the fact information. But we run it to check if the code works. See strategy section 
# below. )

len(follow_up_df[follow_up_df['intra_interval_returns'] > 0])/len(follow_up_df)

0.45760447808663574

## Strategy 

We run the following simple strategy.

Choose time intervals t_1,t_2 and a threshold percentile called the pump threshold.

Resample the returns to measure t_1 and look at the data from the previous month. Find the resampled returns value corresponding to the pump threshold.

Find the probability that you have positive returns for a period of time t_2 following an interval of size t_1 where the returns are in the top pump threshold percentile. This should measure how strong the momentum in the interval t_1 carries over to t_2. 

In [41]:
def calculate_pump_threshold_values(df,pump_threshold):
    """
    input : df - baseline dataframe.  
            pump_threshold - int 0 ---> 100 - the percentile we wish to isolate. 
    
    output : list of pump_threshold_values for all months over which df runs. Pump threshold 
             values are the values corresponding to pump thresholds which are percentiles. 
    """
    g = df.groupby(pd.Grouper(freq = 'M'))
    df_groups = [group for _,group in g] 
    l = []
     
    for df_temp in df_groups:
        a = np.percentile(df_temp['returns'],pump_threshold)
        l.append(a)
    
    return(l)

In [75]:
def get_historical_probabilities_of_success(df,pump_thresh,resample_measure,follow_up_measure):
    """
    Input : df - baseline dataframe
            ptv_list - list of pump threshold values
    Output : list of probabilities of success         
    """
    g = df.groupby(pd.Grouper(freq = 'M'))
    df_groups = [group for _,group in g] 
    
    df_resampled = resample(df,resample_measure)
    ptv_list = calculate_pump_threshold_values(df_resampled,pump_thresh)
    
    m = len(df_groups)
    
    p = {}
    for i in tqdm(range(m)):
        if i > 0:
            df_temp = df_groups[i]
            x = df_temp.index[0]
            s = str(x.month)+'/'+str(x.year)
            ptv_temp = ptv_list[i-1]
            q = get_probability_of_success(df_temp,ptv_temp,resample_measure,follow_up_measure,True)
            p[s] = q
        
    return(p)

In [80]:
resample_measure = pd.Timedelta(221,unit = 'm')
follow_up_measure = pd.Timedelta(3,unit = "m")

In [81]:
path = '../Binance/crypto_pumps/data/raw/Binance/AVAX_USDT.csv'

df_baseline = load_df(path)

df_baseline = df_baseline.dropna(axis = 0)

check_for_missing_timestamps(df_baseline)


get_historical_probabilities_of_success(df_baseline,98,resample_measure,follow_up_measure)

Total NaN : 

timestamp    0
open         0
high         0
low          0
close        0
volume       0
returns      1
dtype: int64

Number of instances for which we have intervals of missing data is 3.

Total amount of missing time in dataframe is 360.0687 minutes


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




{'10/2020': 0.0,
 '11/2020': 0.0,
 '12/2020': 0.6666666666666666,
 '1/2021': 0.4857142857142857,
 '2/2021': 1.0}

In [133]:
#g = df_baseline.groupby(pd.Grouper(freq = 'M'))
#df_groups = [group for _,group in g] 

In [80]:
#e = df_groups[0]

In [83]:
#get_probability_of_success(e,ptv_list[0],'5T',follow_up_measure,False)

In [107]:
def get_price_impact_study(coin_pairs,pump_threshold,time_measure_pairs):
    base_path = '../Binance/crypto_pumps/data/raw/Binance/'
    l = os.listdir('../Binance/crypto_pumps/data/raw/Binance')
    l.sort()
    price_impact = {}
    for x in coin_pairs:
        print('')
        print('Commencing study for pair {}'.format(x))
        path = base_path + x
        df_baseline = load_df(path)
        df_baseline = df_baseline.dropna(axis = 0)
        check_for_missing_timestamps(df_baseline)
        for t in time_measure_pairs:
            resample_measure = pd.Timedelta(t[0],unit = 'm')
            follow_up_measure = pd.Timedelta(t[1],unit = "m")
            print('')
            print('Studying price impact for pair {}'.format(t))
            price_impact[(x,t)] = get_historical_probabilities_of_success(df_baseline,
                                                                                   pump_threshold,resample_measure,follow_up_measure)
            print('')
            print('Price impact for time measure {} is {}'.format(t,price_impact[(x,t)]))
        print('')
        print('Completed study for pair {}'.format(x))    
    return(price_impact)    

In [105]:
time_measure_pairs = [(17,2),(24,5),(32,7),(42,7),(51,6),(61,8),(73,12),(73,23),
                      (73,33),(73,53),(698,58),(698,211),(1286,122),(1286,13)]

In [None]:
coin_pairs = os.listdir('../Binance/crypto_pumps/data/raw/Binance')
coin_pairs.sort()

In [110]:
coin_pairs = l[:3]
pump_threshold = 97
tmp = time_measure_pairs[:3]

get_price_impact_study(coin_pairs,pump_threshold,time_measure_pairs)


Commencing study for pair 1INCH_USDT.csv
Total NaN : 

timestamp    0
open         0
high         0
low          0
close        0
volume       0
returns      1
dtype: int64

Number of instances for which we have intervals of missing data is 0.

Total amount of missing time in dataframe is 0.0 minutes

Studying price impact for pair (17, 2)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (17, 2) is {'1/2021': 0.5, '2/2021': 0.5789473684210527}

Studying price impact for pair (24, 5)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (24, 5) is {'1/2021': 0.47368421052631576, '2/2021': 0.75}

Studying price impact for pair (32, 7)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (32, 7) is {'1/2021': 0.5714285714285714, '2/2021': 0.5555555555555556}

Studying price impact for pair (42, 7)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (42, 7) is {'1/2021': 0.09090909090909091, '2/2021': 0.5714285714285714}

Studying price impact for pair (51, 6)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (51, 6) is {'1/2021': 0.5, '2/2021': 0.3333333333333333}

Studying price impact for pair (61, 8)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (61, 8) is {'1/2021': 0.6666666666666666, '2/2021': 0.5714285714285714}

Studying price impact for pair (73, 12)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (73, 12) is {'1/2021': 0.5, '2/2021': 0.0}

Studying price impact for pair (73, 23)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (73, 23) is {'1/2021': 0.5, '2/2021': 0.3333333333333333}

Studying price impact for pair (73, 33)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (73, 33) is {'1/2021': 1.0, '2/2021': 0.3333333333333333}

Studying price impact for pair (73, 53)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (73, 53) is {'1/2021': 1.0, '2/2021': 0.3333333333333333}

Studying price impact for pair (698, 58)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (698, 58) is {'1/2021': 'NaN', '2/2021': 0.0}

Studying price impact for pair (698, 211)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (698, 211) is {'1/2021': 'NaN', '2/2021': 0.0}

Studying price impact for pair (1286, 122)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (1286, 122) is {'1/2021': 'NaN', '2/2021': 'NaN'}

Studying price impact for pair (1286, 13)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



Price impact for time measure (1286, 13) is {'1/2021': 'NaN', '2/2021': 'NaN'}

Completed study for pair 1INCH_USDT.csv

Commencing study for pair AAVE_USDT.csv
Total NaN : 

timestamp    0
open         0
high         0
low          0
close        0
volume       0
returns      1
dtype: int64

Number of instances for which we have intervals of missing data is 3.

Total amount of missing time in dataframe is 358.4232 minutes

Studying price impact for pair (17, 2)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (17, 2) is {'11/2020': 0.569620253164557, '12/2020': 0.47058823529411764, '1/2021': 0.5260869565217391, '2/2021': 0.48148148148148145}

Studying price impact for pair (24, 5)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (24, 5) is {'11/2020': 0.5087719298245614, '12/2020': 0.5454545454545454, '1/2021': 0.5774647887323944, '2/2021': 0.65}

Studying price impact for pair (32, 7)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (32, 7) is {'11/2020': 0.5263157894736842, '12/2020': 0.6, '1/2021': 0.5107913669064749, '2/2021': 0.6}

Studying price impact for pair (42, 7)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (42, 7) is {'11/2020': 0.5972222222222222, '12/2020': 0.6666666666666666, '1/2021': 0.5257731958762887, '2/2021': 0.2857142857142857}

Studying price impact for pair (51, 6)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (51, 6) is {'11/2020': 0.5164835164835165, '12/2020': 0.0, '1/2021': 0.527027027027027, '2/2021': 0.5555555555555556}

Studying price impact for pair (61, 8)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (61, 8) is {'11/2020': 0.4691358024691358, '12/2020': 0.25, '1/2021': 0.40625, '2/2021': 0.5555555555555556}

Studying price impact for pair (73, 12)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (73, 12) is {'11/2020': 0.5106382978723404, '12/2020': 0.0, '1/2021': 0.4642857142857143, '2/2021': 0.25}

Studying price impact for pair (73, 23)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (73, 23) is {'11/2020': 0.5531914893617021, '12/2020': 0.5, '1/2021': 0.5892857142857143, '2/2021': 0.375}

Studying price impact for pair (73, 33)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (73, 33) is {'11/2020': 0.5106382978723404, '12/2020': 0.5, '1/2021': 0.6071428571428571, '2/2021': 0.25}

Studying price impact for pair (73, 53)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (73, 53) is {'11/2020': 0.44680851063829785, '12/2020': 0.0, '1/2021': 0.6428571428571429, '2/2021': 0.5}

Studying price impact for pair (698, 58)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (698, 58) is {'11/2020': 0.4666666666666667, '12/2020': 'NaN', '1/2021': 0.5, '2/2021': 0.5}

Studying price impact for pair (698, 211)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (698, 211) is {'11/2020': 0.4666666666666667, '12/2020': 'NaN', '1/2021': 0.875, '2/2021': 0.5}

Studying price impact for pair (1286, 122)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (1286, 122) is {'11/2020': 0.7777777777777778, '12/2020': 'NaN', '1/2021': 0.5555555555555556, '2/2021': 1.0}

Studying price impact for pair (1286, 13)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



Price impact for time measure (1286, 13) is {'11/2020': 0.2222222222222222, '12/2020': 'NaN', '1/2021': 0.4444444444444444, '2/2021': 1.0}

Completed study for pair AAVE_USDT.csv

Commencing study for pair ADA_USDT.csv
Total NaN : 

timestamp    0
open         0
high         0
low          0
close        0
volume       0
returns      1
dtype: int64

Number of instances for which we have intervals of missing data is 20.

Total amount of missing time in dataframe is 5079.123516666667 minutes

Studying price impact for pair (17, 2)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (17, 2) is {'5/2018': 0.44, '6/2018': 0.4716981132075472, '7/2018': 0.5, '8/2018': 0.5405405405405406, '9/2018': 0.44285714285714284, '10/2018': 0.6470588235294118, '11/2018': 0.46875, '12/2018': 0.3854166666666667, '1/2019': 0.5263157894736842, '2/2019': 0.4318181818181818, '3/2019': 0.4657534246575342, '4/2019': 0.4774774774774775, '5/2019': 0.5503875968992248, '6/2019': 0.7058823529411765, '7/2019': 0.4725274725274725, '8/2019': 0.3829787234042553, '9/2019': 0.5340909090909091, '10/2019': 0.5, '11/2019': 0.40229885057471265, '12/2019': 0.43902439024390244, '1/2020': 0.4861111111111111, '2/2020': 0.5, '3/2020': 0.5371900826446281, '4/2020': 0.3125, '5/2020': 0.4426229508196721, '6/2020': 0.5192307692307693, '7/2020': 0.4601226993865031, '8/2020': 0.47058823529411764, '9/2020': 0.5, '10/2020': 0.5, '11/2020': 0.5097276264591439, '12/2020': 0.47058823529411764, '1/2021': 0.5020746887966805, '2/2021': 0.3333333333333333}

Studying price impact for pair (2

HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (24, 5) is {'5/2018': 0.5, '6/2018': 0.5, '7/2018': 0.47297297297297297, '8/2018': 0.3877551020408163, '9/2018': 0.35714285714285715, '10/2018': 0.5, '11/2018': 0.45263157894736844, '12/2018': 0.5507246376811594, '1/2019': 0.375, '2/2019': 0.6071428571428571, '3/2019': 0.45098039215686275, '4/2019': 0.5542168674698795, '5/2019': 0.4945054945054945, '6/2019': 0.34782608695652173, '7/2019': 0.5076923076923077, '8/2019': 0.4117647058823529, '9/2019': 0.46774193548387094, '10/2019': 0.5333333333333333, '11/2019': 0.5087719298245614, '12/2019': 0.4074074074074074, '1/2020': 0.477124183006536, '2/2020': 0.4142857142857143, '3/2020': 0.4583333333333333, '4/2020': 0.46153846153846156, '5/2020': 0.40229885057471265, '6/2020': 0.3684210526315789, '7/2020': 0.44036697247706424, '8/2020': 0.38095238095238093, '9/2020': 0.3979591836734694, '10/2020': 0.6, '11/2020': 0.515625, '12/2020': 0.36, '1/2021': 0.45348837209302323, '2/2021': 0.6363636363636364}

Studying pric

HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (32, 7) is {'5/2018': 0.5172413793103449, '6/2018': 0.4, '7/2018': 0.4032258064516129, '8/2018': 0.5227272727272727, '9/2018': 0.425, '10/2018': 0.5454545454545454, '11/2018': 0.3624161073825503, '12/2018': 0.5106382978723404, '1/2019': 0.2857142857142857, '2/2019': 0.36, '3/2019': 0.34328358208955223, '4/2019': 0.6721311475409836, '5/2019': 0.5277777777777778, '6/2019': 0.5714285714285714, '7/2019': 0.37735849056603776, '8/2019': 0.34375, '9/2019': 0.3953488372093023, '10/2019': 0.42857142857142855, '11/2019': 0.4807692307692308, '12/2019': 0.6666666666666666, '1/2020': 0.43548387096774194, '2/2020': 0.5833333333333334, '3/2020': 0.4603174603174603, '4/2020': 0.4, '5/2020': 0.5072463768115942, '6/2020': 0.4090909090909091, '7/2020': 0.30864197530864196, '8/2020': 0.6875, '9/2020': 0.4222222222222222, '10/2020': 0.6363636363636364, '11/2020': 0.46564885496183206, '12/2020': 0.6, '1/2021': 0.44274809160305345, '2/2021': 0.45454545454545453}

Studying pric

HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (42, 7) is {'5/2018': 0.5294117647058824, '6/2018': 0.47619047619047616, '7/2018': 0.5306122448979592, '8/2018': 0.2857142857142857, '9/2018': 0.3783783783783784, '10/2018': 0.6666666666666666, '11/2018': 0.4722222222222222, '12/2018': 0.4, '1/2019': 0.42857142857142855, '2/2019': 0.5714285714285714, '3/2019': 0.43333333333333335, '4/2019': 0.5333333333333333, '5/2019': 0.4153846153846154, '6/2019': 0.375, '7/2019': 0.5348837209302325, '8/2019': 0.25, '9/2019': 0.42424242424242425, '10/2019': 0.6, '11/2019': 0.5161290322580645, '12/2019': 0.4444444444444444, '1/2020': 0.45454545454545453, '2/2020': 0.5833333333333334, '3/2020': 0.421875, '4/2020': 0.6, '5/2020': 0.35714285714285715, '6/2020': 0.6, '7/2020': 0.546875, '8/2020': 0.38461538461538464, '9/2020': 0.34545454545454546, '10/2020': 0.5, '11/2020': 0.5446428571428571, '12/2020': 0.47058823529411764, '1/2021': 0.41228070175438597, '2/2021': 0.6428571428571429}

Studying price impact for pair (51, 6)

HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (51, 6) is {'5/2018': 0.23809523809523808, '6/2018': 0.6428571428571429, '7/2018': 0.3953488372093023, '8/2018': 0.4074074074074074, '9/2018': 0.5806451612903226, '10/2018': 0.375, '11/2018': 0.4050632911392405, '12/2018': 0.6944444444444444, '1/2019': 0.2, '2/2019': 0.5, '3/2019': 0.4791666666666667, '4/2019': 0.3055555555555556, '5/2019': 0.38235294117647056, '6/2019': 0.375, '7/2019': 0.5128205128205128, '8/2019': 0.42857142857142855, '9/2019': 0.42857142857142855, '10/2019': 0.6470588235294118, '11/2019': 0.5, '12/2019': 0.5, '1/2020': 0.5277777777777778, '2/2020': 0.5, '3/2020': 0.5263157894736842, '4/2020': 0.3, '5/2020': 0.5, '6/2020': 0.4375, '7/2020': 0.46153846153846156, '8/2020': 0.5, '9/2020': 0.45454545454545453, '10/2020': 0.2, '11/2020': 0.4666666666666667, '12/2020': 0.7777777777777778, '1/2021': 0.44565217391304346, '2/2021': 0.5714285714285714}

Studying price impact for pair (61, 8)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (61, 8) is {'5/2018': 0.3333333333333333, '6/2018': 0.5714285714285714, '7/2018': 0.4594594594594595, '8/2018': 0.5333333333333333, '9/2018': 0.42857142857142855, '10/2018': 0.6666666666666666, '11/2018': 0.4939759036144578, '12/2018': 0.52, '1/2019': 0.3333333333333333, '2/2019': 0.4117647058823529, '3/2019': 0.39473684210526316, '4/2019': 0.5428571428571428, '5/2019': 0.43243243243243246, '6/2019': 0.6153846153846154, '7/2019': 0.48, '8/2019': 0.4444444444444444, '9/2019': 0.6666666666666666, '10/2019': 0.5, '11/2019': 0.3793103448275862, '12/2019': 0.5, '1/2020': 0.5633802816901409, '2/2020': 0.36363636363636365, '3/2020': 0.6052631578947368, '4/2020': 0.6666666666666666, '5/2020': 0.48717948717948717, '6/2020': 0.2, '7/2020': 0.5263157894736842, '8/2020': 0.36363636363636365, '9/2020': 0.21621621621621623, '10/2020': 0.5555555555555556, '11/2020': 0.5068493150684932, '12/2020': 0.5454545454545454, '1/2021': 0.4788732394366197, '2/2021': 0.42857142857

HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (73, 12) is {'5/2018': 0.4, '6/2018': 0.4666666666666667, '7/2018': 0.42857142857142855, '8/2018': 0.26666666666666666, '9/2018': 0.5, '10/2018': 0.5, '11/2018': 0.5263157894736842, '12/2018': 0.5151515151515151, '1/2019': 1.0, '2/2019': 0.3, '3/2019': 0.3125, '4/2019': 0.6206896551724138, '5/2019': 0.3939393939393939, '6/2019': 0.5, '7/2019': 0.2692307692307692, '8/2019': 0.2, '9/2019': 0.35714285714285715, '10/2019': 0.5833333333333334, '11/2019': 0.5909090909090909, '12/2019': 0.25, '1/2020': 0.49056603773584906, '2/2020': 0.4482758620689655, '3/2020': 0.28125, '4/2020': 0.25, '5/2020': 0.4482758620689655, '6/2020': 0.08333333333333333, '7/2020': 0.5757575757575758, '8/2020': 0.14285714285714285, '9/2020': 0.4782608695652174, '10/2020': 0.5714285714285714, '11/2020': 0.5217391304347826, '12/2020': 0.3076923076923077, '1/2021': 0.509090909090909, '2/2021': 0.5555555555555556}

Studying price impact for pair (73, 23)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (73, 23) is {'5/2018': 0.4, '6/2018': 0.6666666666666666, '7/2018': 0.42857142857142855, '8/2018': 0.26666666666666666, '9/2018': 0.4090909090909091, '10/2018': 0.5, '11/2018': 0.543859649122807, '12/2018': 0.30303030303030304, '1/2019': 0.5, '2/2019': 0.2, '3/2019': 0.28125, '4/2019': 0.4482758620689655, '5/2019': 0.42424242424242425, '6/2019': 0.25, '7/2019': 0.19230769230769232, '8/2019': 0.3, '9/2019': 0.42857142857142855, '10/2019': 0.3333333333333333, '11/2019': 0.5, '12/2019': 0.0, '1/2020': 0.49056603773584906, '2/2020': 0.41379310344827586, '3/2020': 0.28125, '4/2020': 0.25, '5/2020': 0.4827586206896552, '6/2020': 0.16666666666666666, '7/2020': 0.48484848484848486, '8/2020': 0.2857142857142857, '9/2020': 0.4782608695652174, '10/2020': 0.2857142857142857, '11/2020': 0.5507246376811594, '12/2020': 0.38461538461538464, '1/2021': 0.4909090909090909, '2/2021': 0.3333333333333333}

Studying price impact for pair (73, 33)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (73, 33) is {'5/2018': 0.4, '6/2018': 0.5333333333333333, '7/2018': 0.42857142857142855, '8/2018': 0.26666666666666666, '9/2018': 0.4090909090909091, '10/2018': 0.5, '11/2018': 0.43859649122807015, '12/2018': 0.5454545454545454, '1/2019': 1.0, '2/2019': 0.3, '3/2019': 0.28125, '4/2019': 0.5517241379310345, '5/2019': 0.42424242424242425, '6/2019': 0.0, '7/2019': 0.38461538461538464, '8/2019': 0.4, '9/2019': 0.5, '10/2019': 0.5, '11/2019': 0.5, '12/2019': 0.25, '1/2020': 0.5094339622641509, '2/2020': 0.3793103448275862, '3/2020': 0.625, '4/2020': 0.0, '5/2020': 0.5517241379310345, '6/2020': 0.08333333333333333, '7/2020': 0.5151515151515151, '8/2020': 0.42857142857142855, '9/2020': 0.30434782608695654, '10/2020': 0.5714285714285714, '11/2020': 0.5797101449275363, '12/2020': 0.5384615384615384, '1/2021': 0.5272727272727272, '2/2021': 0.2222222222222222}

Studying price impact for pair (73, 53)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (73, 53) is {'5/2018': 0.4, '6/2018': 0.6, '7/2018': 0.3333333333333333, '8/2018': 0.13333333333333333, '9/2018': 0.4090909090909091, '10/2018': 0.5, '11/2018': 0.49122807017543857, '12/2018': 0.5151515151515151, '1/2019': 1.0, '2/2019': 0.4, '3/2019': 0.3125, '4/2019': 0.5172413793103449, '5/2019': 0.45454545454545453, '6/2019': 0.0, '7/2019': 0.4230769230769231, '8/2019': 0.3, '9/2019': 0.5, '10/2019': 0.5, '11/2019': 0.5454545454545454, '12/2019': 0.25, '1/2020': 0.6037735849056604, '2/2020': 0.41379310344827586, '3/2020': 0.5625, '4/2020': 0.25, '5/2020': 0.5517241379310345, '6/2020': 0.3333333333333333, '7/2020': 0.3333333333333333, '8/2020': 0.42857142857142855, '9/2020': 0.4782608695652174, '10/2020': 0.2857142857142857, '11/2020': 0.5362318840579711, '12/2020': 0.46153846153846156, '1/2021': 0.509090909090909, '2/2021': 0.2222222222222222}

Studying price impact for pair (698, 58)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (698, 58) is {'5/2018': 1.0, '6/2018': 0.6666666666666666, '7/2018': 0.5, '8/2018': 0.0, '9/2018': 0.3333333333333333, '10/2018': 'NaN', '11/2018': 0.5555555555555556, '12/2018': 0.5454545454545454, '1/2019': 'NaN', '2/2019': 0.3333333333333333, '3/2019': 'NaN', '4/2019': 0.25, '5/2019': 0.3333333333333333, '6/2019': 'NaN', '7/2019': 0.75, '8/2019': 'NaN', '9/2019': 0.3333333333333333, '10/2019': 0.5, '11/2019': 1.0, '12/2019': 1.0, '1/2020': 0.7272727272727273, '2/2020': 0.0, '3/2020': 0.2857142857142857, '4/2020': 'NaN', '5/2020': 0.0, '6/2020': 0.0, '7/2020': 0.5, '8/2020': 'NaN', '9/2020': 0.3333333333333333, '10/2020': 'NaN', '11/2020': 0.18181818181818182, '12/2020': 0.3333333333333333, '1/2021': 0.75, '2/2021': 0.6666666666666666}

Studying price impact for pair (698, 211)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (698, 211) is {'5/2018': 1.0, '6/2018': 0.3333333333333333, '7/2018': 0.5, '8/2018': 0.0, '9/2018': 0.6666666666666666, '10/2018': 'NaN', '11/2018': 0.5555555555555556, '12/2018': 0.6363636363636364, '1/2019': 'NaN', '2/2019': 0.3333333333333333, '3/2019': 'NaN', '4/2019': 0.75, '5/2019': 0.3333333333333333, '6/2019': 'NaN', '7/2019': 0.5, '8/2019': 'NaN', '9/2019': 0.3333333333333333, '10/2019': 0.5, '11/2019': 0.5, '12/2019': 1.0, '1/2020': 0.45454545454545453, '2/2020': 1.0, '3/2020': 0.2857142857142857, '4/2020': 'NaN', '5/2020': 0.6666666666666666, '6/2020': 1.0, '7/2020': 0.9, '8/2020': 'NaN', '9/2020': 0.3333333333333333, '10/2020': 'NaN', '11/2020': 0.5454545454545454, '12/2020': 0.3333333333333333, '1/2021': 0.75, '2/2021': 0.6666666666666666}

Studying price impact for pair (1286, 122)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (1286, 122) is {'5/2018': 0.0, '6/2018': 'NaN', '7/2018': 1.0, '8/2018': 0.5, '9/2018': 1.0, '10/2018': 0.0, '11/2018': 0.5, '12/2018': 0.3333333333333333, '1/2019': 'NaN', '2/2019': 0.3333333333333333, '3/2019': 0.0, '4/2019': 1.0, '5/2019': 1.0, '6/2019': 'NaN', '7/2019': 0.0, '8/2019': 1.0, '9/2019': 1.0, '10/2019': 0.3333333333333333, '11/2019': 'NaN', '12/2019': 0.0, '1/2020': 0.5, '2/2020': 'NaN', '3/2020': 0.0, '4/2020': 1.0, '5/2020': 0.0, '6/2020': 'NaN', '7/2020': 0.3333333333333333, '8/2020': 'NaN', '9/2020': 0.3333333333333333, '10/2020': 'NaN', '11/2020': 0.375, '12/2020': 0.0, '1/2021': 0.42857142857142855, '2/2021': 'NaN'}

Studying price impact for pair (1286, 13)


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))



Price impact for time measure (1286, 13) is {'5/2018': 0.0, '6/2018': 'NaN', '7/2018': 1.0, '8/2018': 0.5, '9/2018': 0.3333333333333333, '10/2018': 0.0, '11/2018': 0.5, '12/2018': 1.0, '1/2019': 'NaN', '2/2019': 0.6666666666666666, '3/2019': 0.0, '4/2019': 1.0, '5/2019': 0.0, '6/2019': 'NaN', '7/2019': 0.0, '8/2019': 0.0, '9/2019': 1.0, '10/2019': 0.6666666666666666, '11/2019': 'NaN', '12/2019': 0.0, '1/2020': 0.4, '2/2020': 'NaN', '3/2020': 0.0, '4/2020': 0.0, '5/2020': 1.0, '6/2020': 'NaN', '7/2020': 0.3333333333333333, '8/2020': 'NaN', '9/2020': 0.6666666666666666, '10/2020': 'NaN', '11/2020': 0.5, '12/2020': 0.0, '1/2021': 0.2857142857142857, '2/2021': 'NaN'}

Completed study for pair ADA_USDT.csv


{('1INCH_USDT.csv', (17, 2)): {'1/2021': 0.5, '2/2021': 0.5789473684210527},
 ('1INCH_USDT.csv', (24, 5)): {'1/2021': 0.47368421052631576, '2/2021': 0.75},
 ('1INCH_USDT.csv', (32, 7)): {'1/2021': 0.5714285714285714,
  '2/2021': 0.5555555555555556},
 ('1INCH_USDT.csv', (42, 7)): {'1/2021': 0.09090909090909091,
  '2/2021': 0.5714285714285714},
 ('1INCH_USDT.csv', (51, 6)): {'1/2021': 0.5, '2/2021': 0.3333333333333333},
 ('1INCH_USDT.csv', (61, 8)): {'1/2021': 0.6666666666666666,
  '2/2021': 0.5714285714285714},
 ('1INCH_USDT.csv', (73, 12)): {'1/2021': 0.5, '2/2021': 0.0},
 ('1INCH_USDT.csv', (73, 23)): {'1/2021': 0.5, '2/2021': 0.3333333333333333},
 ('1INCH_USDT.csv', (73, 33)): {'1/2021': 1.0, '2/2021': 0.3333333333333333},
 ('1INCH_USDT.csv', (73, 53)): {'1/2021': 1.0, '2/2021': 0.3333333333333333},
 ('1INCH_USDT.csv', (698, 58)): {'1/2021': 'NaN', '2/2021': 0.0},
 ('1INCH_USDT.csv', (698, 211)): {'1/2021': 'NaN', '2/2021': 0.0},
 ('1INCH_USDT.csv', (1286, 122)): {'1/2021': 'NaN', '2