# Classification Models with Crypto

In [1]:
# libraries
import pandas as pd
from ta import add_all_ta_features
from datetime import datetime, timedelta
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px
from tqdm import tqdm
from eod_historical_data import get_eod_data

# 1. Getting Crypto Price Data

In [2]:
# Importing and assigning the api key
with open("../eodHistoricalData-API.txt", "r") as f:
    api_key = f.read()
    

def getCryptoPrice(api_key, ticker="BTC-USD", n_days=1000):
    
    # Time periods
    now = datetime.now()

    # How far back to retrieve
    ago = now - timedelta(days=n_days)

    # Getting the price history for the crypto
    df = get_eod_data(
        ticker, 
        "CC", 
        api_key=api_key, 
        start=ago, 
        end=now
    )
    

    # Dropping a column
    df.drop(
        ["Adjusted_close"], 
        axis=1,
        inplace=True
    )

    return df

## Adding Technical Indicators

In [None]:
df = add_all_ta_features(
    df, 
    open="Open", 
    high="High", 
    low="Low", 
    close="Close", 
    volume="Volume", 
    fillna=True
)

# 2. Data Transformation/Preprocessing

In [None]:
def transformData(df, days=1):
    """
    Transforming data into X variables for training.  Uses percent change and 
    multiplies the percentage by 100 rounded to 2 decimal places.
    """
    # Transforming data
    new_df = df.pct_change(
        days
    ).apply(
        lambda x: round(x*100, 2)
    ).replace(
        [np.inf, -np.inf], 
        np.nan
    )
    
    # Dropping Nans
    new_df = new_df.dropna(
        thresh=round(new_df.shape[1]*.7) # If 70% of the values in the row are Nans, drop the whole row
    ).dropna(
        axis=1,
        thresh=round(new_df.shape[0]*.7) # If 70% of the values in the columns are Nans, drop the whole column
    )
    
    # What the percent change is going to be in the next days AKA the Y Variable
    new_df[f'future_{days}_days']= df['Open'].pct_change(
        days
    ).shift(
        -days
    ).apply(
        lambda x: round(x*100, 2)
    )
    
    # Saving the last value in the dataset for later
    last_val = new_df.tail(1).drop(f'future_{days}_days',
                                   axis=1)

    # Dropping the last NaNs from the Y variable
    new_df = new_df.dropna(
        subset=[f'future_{days}_days']
    )

    # Filling in the rest of the NaNs with the most recent value
    new_df = new_df.fillna(method='ffill').dropna()

    return new_df, last_val

# 3. Fitting to Classification Model