In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install kats

In [None]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from kats.detectors.bocpd import BOCPDetector, BOCPDModelType, TrendChangeParameters, NormalKnownParameters
from kats.detectors.robust_stat_detection import RobustStatDetector
from kats.consts import TimeSeriesData
import plotly.express as px
from datetime import datetime
import warnings

In [None]:
data = pd.read_csv('../input/g-research-crypto-forecasting/train.csv')
asset_id = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
asset = asset_id.set_index('Asset_ID').sort_index()

**The function below does cp detection with kats library**
## *Parameters:*
* *data* - data 
* *asset_id* - coin id 
* *prior* - as I use Bayesian online detection method with an assumed nornmal distribution of changepoints prior stands for share of cp among datapoints
* *feature* - 'Close' or 'Volume
* *confidence* - filtering cps only with confidence greater than it
* *scaling* - if True standard sacaling of values, I sometimes prefer to use it for Volume

In [None]:
def changepoint_auto(data, asset_id, prior, feature, confidence = 0.75, scaling=True):
    asset_name = asset.loc[asset_id]['Asset_Name']
    asset_data = data[data['Asset_ID'] == asset_id].set_index("timestamp")
    tdf = pd.DataFrame()
    tdf['time'] = pd.to_datetime(asset_data.index*1000000000)
    tdf['value'] = asset_data[feature].values
    if feature == 'Close':
        th = tdf.resample('D', on='time').value.mean().to_frame()
    else:
        th = tdf.resample('D', on='time').value.sum().to_frame()
    th['time'] = th.index
    if scaling:
        sc = StandardScaler()
        cols_to_scale = ['value']
        th['value'] = sc.fit_transform(th[cols_to_scale].values)
    ts = TimeSeriesData(th)    
    bocpd_detector = BOCPDetector(ts)
    changepoints = bocpd_detector.detector(
        model=BOCPDModelType.NORMAL_KNOWN_MODEL,
        model_parameters = NormalKnownParameters(
                #cp_prior=0.01, 
                #search_method= 'random',  
                #known_prec_multiplier = 1.0,
                empirical=True
            ),
        threshold = confidence,
        changepoint_prior=prior,
        choose_priors = True,
        agg_cp = True
    )   
    
    fig = px.line(th.value, title='Daily {0} graph for {1} with changepoints, value standard scaling: {2}'.format(feature, asset_name, scaling))
    
    #filetered_cahngepoints = []
    i = 1
    for changepoint in changepoints:
        if i%2==0:
            color = "red"
        else:
            color = "green"
        if i==1:
            point = datetime.strptime(str(changepoint[0].start_time).split('T')[0], "%Y-%m-%d")
        elif i>2:
            point_p = point
            point = datetime.strptime(str(changepoint[0].start_time).split('T')[0], "%Y-%m-%d")
            fig.add_vrect(
                x0 = point_p,
                x1 = point,
              fillcolor=color, opacity=0.05, line_width=0)
        i+=1
        
    #print(asset_name, len(changepoints), filetered_cahngepoints)
    #bocpd_detector.plot(changepoints)
    fig.show()

In [None]:

changepoint_auto(data, 0, 5e-3, 'Volume', 0.7, False)

In [None]:
changepoint_auto(data, 0, 5e-3, 'Close', 0.75, False)

In [None]:
changepoint_auto(data, 1, 5e-3, 'Volume', 0.6, False)

In [None]:
changepoint_auto(data, 1, 5e-5, 'Close', 0.8, False)

In [None]:
changepoint_auto(data, 2, 5e-4, 'Volume', 0.65, False)

In [None]:
changepoint_auto(data, 2, 5e-5, 'Close', 0.77, False)

In [None]:
changepoint_auto(data, 3, 5e-3, 'Volume', 0.7, False)

In [None]:
changepoint_auto(data, 3, 5e-5, 'Close', 0.7, False)

In [None]:
changepoint_auto(data, 4, 5e-3, 'Volume', 0.65, False)

In [None]:
changepoint_auto(data, 4, 5e-3, 'Close', 0.7, False)

In [None]:
changepoint_auto(data, 5, 5e-4, 'Volume', 0.7, False)

In [None]:
changepoint_auto(data, 5, 5e-3, 'Close', 0.75, False)

In [None]:
changepoint_auto(data, 6, 5e-3, 'Volume', 0.75, False)

In [None]:
changepoint_auto(data, 6, 5e-3, 'Close', 0.72, False)

In [None]:
changepoint_auto(data, 7, 5e-3, 'Volume', 0.75, False)

In [None]:
changepoint_auto(data, 7, 5e-3, 'Close',0.75, False)

In [None]:
changepoint_auto(data, 8, 5e-3, 'Volume', 0.6, False)

In [None]:
changepoint_auto(data, 8, 5e-3, 'Close',0.75, False)

In [None]:
changepoint_auto(data, 9, 5e-3, 'Volume',0.65, False)

In [None]:
changepoint_auto(data, 9, 5e-3, 'Close',0.75, False)

In [None]:
changepoint_auto(data, 10, 5e-3, 'Volume',0.75, False)

In [None]:
changepoint_auto(data, 11, 5e-3, 'Volume',0.6, False)

In [None]:
changepoint_auto(data, 11, 5e-3, 'Close',0.75, False)

In [None]:
changepoint_auto(data, 12, 5e-3, 'Volume',0.75, False)

In [None]:
changepoint_auto(data, 12, 5e-4, 'Close',0.7, False)

**TO Do:**
* cut maker dataset
* deal with the warnings

**Possible usage in competition:**
* OOF in line with periods between changepoints
* categorical features
* basis for numerical features reflecting trend momentum