In [None]:
import gresearch_crypto
import pandas as pd
import numpy as np
import seaborn as sns
import time
import datetime
import matplotlib.pyplot as plt

## Importing Data

In [None]:
df_train= pd.read_csv('../input/g-research-crypto-forecasting/train.csv', low_memory=False, 
                       dtype={'Asset_ID': 'int8', 'Count': 'int32', 'row_id': 'int32', 'Count': 'int32', 
                              'Open': 'float64', 'High': 'float64', 'Low': 'float64', 'Close': 'float64', 
                              'Volume': 'float64', 'VWAP': 'float64'
                             }
                      )

In [None]:
df_train['datetime'] = pd.to_datetime(df_train['timestamp'], unit='s')

In [None]:
df_train["timestamp"][1]

In [None]:
df_train["timestamp"][-1:].values[0]

Dropping -inf, inf, and -values for VWAP.

In [None]:
df_train["VWAP"].max()

In [None]:
df_train["VWAP"].min()

In [None]:
df_train[df_train["VWAP"]<0]

In [None]:
df_train[df_train["VWAP"]==np.inf]

In [None]:
if df_train["VWAP"].max()==np.inf:
    df_train=df_train.drop(df_train[df_train["VWAP"]==np.inf].index,axis=0)
if df_train["VWAP"].min()<=0:
    df_train=df_train.drop(df_train[df_train["VWAP"]<0].index,axis=0)

In [None]:
arr_1d=np.arange(df_train["timestamp"][1],df_train["timestamp"][-1:].values[0],60*60*24)

In [None]:
arr_1d.shape

In [None]:
daily_df=pd.DataFrame()

In [None]:
for A in arr_1d:
    daily_df=daily_df.append(df_train[df_train["timestamp"]==A])
    

In [None]:
daily_df

In [None]:
df_train=daily_df

In [None]:
df_train

In [None]:
df_train["datetime2"]=df_train["datetime"]

In [None]:
df_train=df_train.set_index("datetime2").to_period("D") 

In [None]:
asset_details=pd.read_csv("../input/g-research-crypto-forecasting/asset_details.csv").sort_values("Asset_ID")

In [None]:
asset_details

In [None]:
#splitting to assets
binance=df_train[df_train["Asset_ID"]==0].sort_values("timestamp")
bitcoin=df_train[df_train["Asset_ID"]==1].sort_values("timestamp")
btccash=df_train[df_train["Asset_ID"]==2].sort_values("timestamp")
cardona=df_train[df_train["Asset_ID"]==3].sort_values("timestamp")
doge =  df_train[df_train["Asset_ID"]==4].sort_values("timestamp")
eos  =  df_train[df_train["Asset_ID"]==5].sort_values("timestamp")
eth  =  df_train[df_train["Asset_ID"]==6].sort_values("timestamp")
ethcls =df_train[df_train["Asset_ID"]==7].sort_values("timestamp")
iota   =df_train[df_train["Asset_ID"]==8].sort_values("timestamp")
ltc   = df_train[df_train["Asset_ID"]==9].sort_values("timestamp")
maker = df_train[df_train["Asset_ID"]==10].sort_values("timestamp")
monero =df_train[df_train["Asset_ID"]==11].sort_values("timestamp")
stellar=df_train[df_train["Asset_ID"]==12].sort_values("timestamp")
tron  = df_train[df_train["Asset_ID"]==13].sort_values("timestamp")

In [None]:
bitcoin

In [None]:
coinlist=["binance","bitcoin","btccash","cardona","doge",
          "eos","eth","ethcls","iota","ltc","maker","monero","stellar","tron"]

In [None]:
coindict={"binance":0,"bitcoin":1,"btccash":2,"cardona":3,"doge":4,
                    "eos":5,"eth":6,"ethcls":7,"iota":8,"ltc":9,"maker":10,"monero":11,"stellar":12,"tron":13}

In [None]:
coindict["binance"]

## Indicator features

In [None]:
for name in coinlist:
    str=name
    
    
    locals()[str]=locals()[str].copy()
    
    locals()[str]["returns"] = np.log(locals()[str].Close / locals()[str].Close.shift(1)) # daily return
    
    # Simple Moving Averages
    locals()[str]["MA12"] = locals()[str].Close.rolling(window = 12).mean() 
    locals()[str]["MA26"] = locals()[str].Close.rolling(window = 26).mean()
    locals()[str]["MA60"] = locals()[str].Close.rolling(window = 60).mean()
    locals()[str]["MA99"] = locals()[str].Close.rolling(window = 99).mean()
    locals()[str]["MA200"] = locals()[str].Close.rolling(window = 200).mean()
    
    #Moving average convergence divergence (MACD)
    locals()[str]["smaMACD"] = locals()[str].MA12-locals()[str].MA26 #fast SMA - slow SMA
    locals()[str]["smaMACDs"] = locals()[str].smaMACD.rolling(window = 9).mean() # for 9 day period MACD signal
    locals()[str]["smaMACDh"] = locals()[str].smaMACD - locals()[str].smaMACDs # MACD histogram
    
    #Bull Bear Power
    locals()[str]["BearBullPower2"] = locals()[str]["High"]-locals()[str]["MA12"] + locals()[str]["Low"]-locals()[str]["MA12"]
    
    #Bollinger Bands
    locals()[str]["BollingerBasis"] = locals()[str].Close.rolling(window = 20).mean()
    locals()[str]["BollingerUpper"] = locals()[str]["BollingerBasis"]+2*(locals()[str]["Close"].rolling(20).std())
    locals()[str]["BollingerLower"] = locals()[str]["BollingerBasis"]-2*(locals()[str]["Close"].rolling(20).std())
    
    #Dochian Channel
    locals()[str]["DCup"] = locals()[str].High.rolling(window = 20).max() #Highest High in Last 20 days Periods
    locals()[str]["DClow"] = locals()[str].Low.rolling(window = 20).min() #Lowest Low in Last 20 days Periods
    locals()[str]["DCmid"] = (locals()[str]["DCup"]+locals()[str]["DClow"])/2
    
    print(name+"'s shape: ",locals()[str].shape[0])
    print(name+" is Done.")
    
print("All done.")

In [None]:
for name in coinlist:
    str=name
    locals()[str]=locals()[str].dropna()
    print(name+"'s shape: ",locals()[str].shape[0])
    print(name+" is Done.")
    
print("All done.")

In [None]:
bitcoin.columns

In [None]:
test_columns=['Count','Open','High', 'Low', 'Close','Volume','VWAP',"Target" ,'returns', 'MA26', 'MA12', 'smaMACD',
            'smaMACDs', 'smaMACDh', 'MA99', 'MA200', 'MA60',"BollingerBasis","BollingerUpper",
              "BollingerLower",'DCup', 'DClow','DCmid']

In [None]:
bitcoin

In [None]:
bitcoin.index.values

## Data Analysis

In [None]:
from warnings import simplefilter
simplefilter("ignore")

In [None]:
for name in coinlist:
    str=name
    f,ax=plt.subplots(nrows=3,ncols=1,figsize=(16,8),gridspec_kw={'height_ratios': [2, 1,1]})
    ax[0].set_title(name.title())
    sns.lineplot(locals()[str]["datetime"],locals()[str]["Close"].values,label="close",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["BollingerUpper"].values,label="BBup",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["BollingerBasis"].values,label="BBmid",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["BollingerLower"].values,label="BBlow",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["MA26"].values,label="MA26",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["BearBullPower2"].values,label="BullBearPower",ax=ax[1])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["returns"].values,label="daily returns",ax=ax[2])
    plt.show()
    print(" "*57+name.title()+"'s"+' plot'.title())
    print(" ")
    print(" ")


In [None]:
for name in coinlist:
    str=name
    f,ax=plt.subplots(nrows=2,ncols=1,figsize=(16,8),gridspec_kw={'height_ratios': [2, 1]})
    ax[0].set_title(name.title())
    sns.lineplot(locals()[str]["datetime"],locals()[str]["Close"].values,label="close",ax=ax[0])
    #sns.lineplot(locals()[str]["datetime"],locals()[str]["MA200"].values,label="MA200",ax=ax[0])
    #sns.lineplot(locals()[str]["datetime"],locals()[str]["MA99"].values,label="MA99",ax=ax[0])
    #sns.lineplot(locals()[str]["datetime"],locals()[str]["MA60"].values,label="MA60",ax=ax[0])
    #sns.lineplot(locals()[str]["datetime"],locals()[str]["MA26"].values,label="MA26",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["smaMACD"].values,label="smaMACD",ax=ax[1])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["smaMACDs"].values,label="smaMACD signal",ax=ax[1])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["smaMACDh"].values,label="smaMACD histogram",ax=ax[1])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["DCup"].values,label="BBup",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["DCmid"].values,label="BBmid",ax=ax[0])
    sns.lineplot(locals()[str]["datetime"],locals()[str]["DClow"].values,label="BBlow",ax=ax[0])
    plt.show()
    print(" "*57+name.title()+"'s"+' plot'.title())
    print(" ")
    print(" ")
    

In [None]:
bitcoin

## Model, fit, grahps

In [None]:
def fourier_features(index, freq, order):
    time = np.arange(len(index), dtype=np.float32)
    k = 2 * np.pi * (1 / freq) * time
    features = {}
    for i in range(1, order + 1):
        features.update({
            f"sin_{freq}_{i}": np.sin(i * k),
            f"cos_{freq}_{i}": np.cos(i * k),
        })
    return pd.DataFrame(features, index=index)


# Compute Fourier features to the 4th order (8 new features) for a
# series y with daily observations and annual seasonality:
#
# fourier_features(y, freq=365.25, order=4)

In [None]:
from warnings import simplefilter

simplefilter("ignore")

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 5))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
    legend=False,
)
%config InlineBackend.figure_format = 'retina'


# annotations: https://stackoverflow.com/a/49238256/5769929
def seasonal_plot(X, y, period, freq, ax=None):
    if ax is None:
        _, ax = plt.subplots()
    palette = sns.color_palette("husl", n_colors=X[period].nunique(),)
    ax = sns.lineplot(
        x=freq,
        y=y,
        hue=period,
        data=X,
        ci=False,
        ax=ax,
        palette=palette,
        legend=False,
    )
    ax.set_title(f"Seasonal Plot ({period}/{freq})")
    for line, name in zip(ax.lines, X[period].unique()):
        y_ = line.get_ydata()[-1]
        ax.annotate(
            name,
            xy=(1, y_),
            xytext=(6, 0),
            color=line.get_color(),
            xycoords=ax.get_yaxis_transform(),
            textcoords="offset points",
            size=14,
            va="center",
        )
    return ax


def plot_periodogram(ts, detrend='linear', ax=None):
    from scipy.signal import periodogram
    fs = pd.Timedelta("1Y") / pd.Timedelta("1D")
    freqencies, spectrum = periodogram(
        ts,
        fs=fs,
        detrend=detrend,
        window="boxcar",
        scaling='spectrum',
    )
    if ax is None:
        _, ax = plt.subplots()
    ax.step(freqencies, spectrum, color="purple")
    ax.set_xscale("log")
    ax.set_xticks([1, 2, 4, 6, 12, 26, 52, 104])
    ax.set_xticklabels(
        [
            "Annual (1)",
            "Semiannual (2)",
            "Quarterly (4)",
            "Bimonthly (6)",
            "Monthly (12)",
            "Biweekly (26)",
            "Weekly (52)",
            "Semiweekly (104)",
        ],
        rotation=30,
    )
    ax.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
    ax.set_ylabel("Variance")
    ax.set_title("Periodogram")
    return ax

In [None]:
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import ExtraTreeRegressor
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess

In [None]:
regression_model=ExtraTreeRegressor()

In [None]:
fourier = CalendarFourier(freq="A", order=100)  #100 sin/cos pairs for "A"nnual seasonality

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
for name in coinlist:
    str=name
    X=name+"_X"
    yy=name+"_y"
    y_pred=name+"_y_pred"
    X_fore=name+"_X_fore"
    y_fore=name+"_y_fore"
    model =name+"_XGB_model"
    df_pred=name+"_df_pred"
    df_fore=name+"_df_fore"
    print("1."+name+" Starting")
    
    dp = DeterministicProcess(
    index=locals()[str].index,
    constant=False,               # dummy feature for bias (y-intercept)
    order=1,                     # trend (order 1 means linear)
    seasonal=True,               # weekly seasonality (indicators)
    additional_terms=[fourier], # annual seasonality (fourier)
    drop=False                  # drop terms to avoid collinearity
                      
    )
    
    locals()[X] = locals()[str][test_columns]

    # days within a week
    locals()[X]["day"] = locals()[X].index.dayofweek  # the x-axis (freq)
    locals()[X]["week"] = locals()[X].index.week  # the seasonal period (period)
    
    # days within a year
    locals()[X]["dayofyear"] = locals()[X].index.dayofyear
    locals()[X]["year"] = locals()[X].index.year
    print("2. first plot")
    
    fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(24, 8))
    seasonal_plot(locals()[X], y="Target", period="week", freq="day", ax=ax0)
    seasonal_plot(locals()[X], y="Target", period="year", freq="dayofyear", ax=ax1);
    plt.show()
    print("3. creting dp")
    locals()[X] = dp.in_sample()
    print("4. dp finished")
    print(name,"'s shape: ",locals()[X].shape)

    locals()[yy] = locals()[str]["Target"]
    print("5. ",name,"'s shape: ",locals()[yy].shape)

    locals()[model] = regression_model
    print("6. ",name+"'s fitting...")
    _ = locals()[model].fit(locals()[X], locals()[yy])
    print("7. predict")
    locals()[y_pred] = pd.Series(locals()[model].predict(locals()[X]), index=locals()[yy].index)
    locals()[X_fore] = dp.out_of_sample(steps=240)
    locals()[y_fore] = pd.Series(locals()[model].predict(locals()[X_fore]), index=locals()[X_fore].index)
    
    plt.figure(figsize=(8,3))
    sns.regplot(x=locals()[y_pred],y=locals()[str]["Target"],line_kws={"color":"black"})
    plt.show()
    plt.figure(figsize=(8,3))
    sns.histplot((locals()[y_pred]-locals()[str]["Target"]),kde=True,bins=180)
    plt.show()
    
    print(name+'_MAE:', mean_absolute_error(locals()[y_pred],locals()[str]["Target"]))
    print(name+'_MSE:', mean_squared_error(locals()[y_pred],locals()[str]["Target"]))
    print("8. second plot")
    plt.figure(figsize=(24,6))
    ax = locals()[yy].plot(color='0.25', style='.', title=name+" Target - Seasonal Forecast")
    ax = locals()[y_pred].plot(ax=ax, label=name+" Seasonal")
    ax = locals()[y_fore].plot(ax=ax, label=name+" Seasonal Forecast", color='C3')
    _ = ax.legend()
    plt.show()
    
    locals()[df_pred]=pd.DataFrame(locals()[y_pred],columns=["Target"])
    locals()[df_pred]["Asset_ID"] = coindict[name]
    locals()[df_fore]=pd.DataFrame(locals()[y_fore],columns=["Target"])
    locals()[df_fore]["Asset_ID"] = coindict[name]
    print("9."+name+" finished")
print("All Done.")

## Predict and Forecast Dataframes

In [None]:
df_predict=pd.concat([cardona_df_pred,btccash_df_pred,binance_df_pred,bitcoin_df_pred,doge_df_pred,eos_df_pred, ethcls_df_pred,
                       eth_df_pred,iota_df_pred,ltc_df_pred,maker_df_pred,tron_df_pred,stellar_df_pred,monero_df_pred])
df_predict["Target"]=df_predict["Target"].astype(np.float64)
df_predict

In [None]:
df_predict.info()

In [None]:
df_forecast=pd.concat([cardona_df_fore,btccash_df_fore,binance_df_fore,bitcoin_df_fore,doge_df_fore,eos_df_fore, ethcls_df_fore,
                       eth_df_fore,iota_df_fore,ltc_df_fore,maker_df_fore,tron_df_fore,stellar_df_fore,monero_df_fore])


In [None]:
df_forecast

In [None]:
df_forecast.info()

## References

##### [Seasonality lesson from Kaggle's Time Series Course](http://www.kaggle.com/ryanholbrook/seasonality)

