<h1>Polynomial Regression Model</h1>
<p>Models generated using Sklearn. Datasets gathered from <a href = "https://www.coindesk.com/">CoinDesk<a></p>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from pandas_profiling import ProfileReport
import math as ma

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

import operator

get_ipython().run_line_magic('matplotlib', 'inline')

# ['#f39506', '#115268', '#FDB6BA', '#BC8E8E', '#4682B4', '#8486BF']

In [None]:
def days_prior(df: pd.DataFrame, days: int, time_frame: int = 0) -> pd.DataFrame:

    epoch = len(df.index) - ma.floor(days)

    if time_frame == 0.0:

        return df[(df.index >= epoch)]

    else:
        
        end = epoch + ma.floor(time_frame)

        return df[(df.index >= epoch) & (df.index < end)]

    
def train_test_split_by_date(df: pd.DataFrame, x: str, y: str, reverse_epoch: int, time_interval: int = None):
    
    '''
        returns a list of x_train, y_train, x_test, y_test, x_full, y_full in order
    '''

    if(time_interval == None or time_interval == 0.0):
        
        time_interval = reverse_epoch - 1

    df_full  = days_prior(df, reverse_epoch, time_interval + 1)
    df_train = days_prior(df, reverse_epoch, time_interval)
    df_test  = days_prior(df, reverse_epoch - time_interval, 1)

    if x == 'index':
        x_full  = df_full.index.to_numpy()
        y_full  = df_full[y].to_numpy()
        x_train = df_train.index.to_numpy()
        x_test  = df_test.index.to_numpy()
        y_train = df_train[y].to_numpy()
        y_test  = df_test[y].to_numpy()

    else:
        x_full  = df_full[x].to_numpy()
        y_full  = df_full[y].to_numpy()
        x_train = df_train[x].to_numpy()
        x_test  = df_test[x].to_numpy()
        y_train = df_train[y].to_numpy()
        y_test  = df_test[y].to_numpy()

    return [x_train, y_train, x_test, y_test, x_full, y_full]


def polynomial_regression_model_predict(df: pd.DataFrame, degree: int, epoch: int, period: float = None, name: str = 'df', color: list = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']) -> float:

    x_train, y_train, x_test, y_test, x_full, y_full = train_test_split_by_date(df, 'index', 'Closing Price (USD)', epoch, period)

    polynomial_features = PolynomialFeatures(degree = degree)
    x_poly_train = polynomial_features.fit_transform(x_train[:, None])

    model = LinearRegression()
    model.fit(x_poly_train, y_train)

    y_pred_test = model.predict(polynomial_features.fit_transform(x_test[:, None]))
    y_pred_full = model.predict(polynomial_features.fit_transform(x_full[:, None]))

    plt.figure(figsize = (30,10))
    plt.title(name)
    plt.grid(False)

    
    plt.scatter(df['Date'][ x_test[0] : x_test[-1] + 1], y_test, label = 'Tested value', color = color[0])
    plt.scatter(df['Date'][ x_test[0] : x_test[-1] + 1], y_pred_test, label = 'Predicted value', color = color[1])

    plt.plot(df['Date'][ x_full[0] : x_full[-1] + 1], y_full, label = 'Full data', color = color[2])
    plt.plot(df['Date'][ x_train[0] : x_train[-1] + 1], y_train, label = 'Train data', color = color[3])

    sorted_zip = sorted(zip(x_full, y_pred_full), key = operator.itemgetter(0))
    x_full, y_pred_full = zip(*sorted_zip)

    plt.plot(df['Date'][ x_full[0] : x_full[-1] + 1], y_pred_full, label = 'Prediction curve', color = color[4])

    plt.legend(loc = 'best')
    plt.show()

    return {'accuracy': abs(y_pred_test - y_test), 'rmse': np.sqrt(mean_squared_error(y_full, y_pred_full)), 'r2': r2_score(y_full, y_pred_full)}


def deviation(df: pd.DataFrame, columns: list, duration: int) -> pd.DataFrame:
    '''
    duration in days
    columns name where the first column name provided will be subtracted by the second column
    '''
    
    new_df = days_prior(df, duration)
    deviation_data = [new_df[columns[0]][i] - new_df[columns[1]][i] for i in new_df.index]

    return pd.DataFrame({'Date': new_df['Date'], 'Deviation': deviation_data})



In [None]:
df_cardano  = pd.read_csv('../data/Cardano.csv',    parse_dates = ['Date']).drop(columns = ['Currency'])
df_ethereum = pd.read_csv('../data/Ethereum.csv',   parse_dates = ['Date']).drop(columns = ['Currency'])
df_solana   = pd.read_csv('../data/Solana.csv',     parse_dates = ['Date']).drop(columns = ['Currency'])
df_dogecoin = pd.read_csv('../data/Dogecoin.csv',   parse_dates = ['Date']).drop(columns = ['Currency'])

<h2> Polynomial Regression Method Accuracy Test </h2>

<p>
    Accuracy will be tested using four datasets: ETH, ADA, DOGE, and SOL. <br/>
    Accuracy will be measured by calculating the average prediction error of every model created using cryptocurrecny datasets.
</p>
<p>Testing Categroy:</p>
<ul>
    <li>Long-term accuracy — accuracy of prediction <i>3 weeks</i> into the future.</li>
    <li>Medium-term accuracy — accuracy of prediction <i>1 week into</i> the future.</li>
    <li>Short-term accuracy — accuracy of prediction <i>2 days into</i> the future.</li>
</ul>


In [None]:
# Tesing



In [None]:
df_deviation = deviation(df_cardano, ['Closing Price (USD)', '24h Open (USD)'], 90)
plt.figure(figsize = (30,10))
plt.title('deviation')
plt.grid(False)
plt.plot(df_deviation['Date'], df_deviation['Deviation'])

In [None]:
print(polynomial_regression_model_predict(df_ethereum, 6, 120, name = 'Ether'))
print(polynomial_regression_model_predict(df_ethereum, 6, 90 , name = 'Ether'))
print(polynomial_regression_model_predict(df_ethereum, 6, 60 , name = 'Ether'))
print(polynomial_regression_model_predict(df_ethereum, 6, 30 , name = 'Ether'))
print(polynomial_regression_model_predict(df_ethereum, 6, 14 , name = 'Ether'))
print(polynomial_regression_model_predict(df_ethereum, 6, 7  , name = 'Ether'))

In [None]:
print(polynomial_regression_model_predict(df_cardano, 6, 120, name = 'Cardano'))
print(polynomial_regression_model_predict(df_cardano, 6, 90 , name = 'Cardano'))
print(polynomial_regression_model_predict(df_cardano, 6, 60 , name = 'Cardano'))
print(polynomial_regression_model_predict(df_cardano, 6, 30 , name = 'Cardano'))
print(polynomial_regression_model_predict(df_cardano, 6, 14 , name = 'Cardano'))
print(polynomial_regression_model_predict(df_cardano, 6, 7  , name = 'Cardano'))

In [None]:
print(polynomial_regression_model_predict(df_solana, 6, 60 , name = 'Solana'))
print(polynomial_regression_model_predict(df_solana, 6, 30 , name = 'Solana'))
print(polynomial_regression_model_predict(df_solana, 6, 14 , name = 'Solana'))
print(polynomial_regression_model_predict(df_solana, 4, 7  , name = 'Solana'))