# Sentiment and Time Series ML with Crypto

In [1]:
# Libraries
from eod import EodHistoricalData
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import nltk
from textblob import TextBlob
import numpy as np
import random
import plotly.express as px
from prophet import Prophet

# Importing and assigning the api key
with open("../../eodHistoricalData-API.txt", "r") as f:
    api_key = f.read()
    
# EOD Historical Data client
client = EodHistoricalData(api_key)

# News and Price Data

In [2]:
def getNews(ticker, days):
    """
    Retrieves financial news over the course of a specified number of days for
    a given stock ticker.
    """

    # List of news
    news = []
    
    # How many days back to retrieve
    ago = datetime.now() - timedelta(days=days)

    # Getting news over the course of a year
    for i in tqdm(range(10, days, 10)):

        # Grabbing the news
        resp = client.get_financial_news(
            s=ticker+".CC",
            from_=(ago+timedelta(days=i-10)).strftime("%Y-%m-%d"),
            to=(ago+timedelta(days=i)).strftime("%Y-%m-%d"),
            limit=100
        )

        # Adding to the news list
        news.extend(resp)
                
    # Filtering out irrelevant news
    lst = [i for i in news if sum(x in {ticker.lower()} for x in nltk.word_tokenize(i['title'].lower()))>=1]
    
    # Formatting the date
    news = pd.DataFrame(lst)
    
    news['date'] = news['date'].apply(lambda x: x[:10])
    
    return news

In [3]:
# Retrieving financial news
news = getNews("BTC", 100)

100%|██████████| 9/9 [00:08<00:00,  1.01it/s]


# Sentiment Analysis

In [4]:
def sentimentPositions(val, thresh=0.1):
    """
    Returns position as 1, -1, or 0 for Buy, Sell, 
    and Do Nothing respectively based on the given 
    sentiment value and threshold.
    """
    if val > thresh:
        return 1
    elif val< -thresh:
        return -1
    else:
        return 0

In [5]:
# Getting sentiment values for the news headlines/titles
news['sentiment'] = news['title'].apply(
    lambda x: TextBlob(x.lower()).sentiment[0]
)

# Grouping together dates and aggregating sentiment scores from the same day
sent_df = news.groupby('date')[['sentiment']].mean()

# Applying the position function
sent_df['sentiment_positions'] = sent_df['sentiment'].apply(
    lambda x: sentimentPositions(x, thresh=0)
)

In [6]:
sent_df

Unnamed: 0_level_0,sentiment,sentiment_positions
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-19,0.132955,1
2021-09-20,0.125,1
2021-09-21,0.0,0
2021-09-24,0.0,0
2021-09-27,0.25,1
2021-09-28,-0.25,-1
2021-09-29,0.0,0
2021-09-30,0.0,0
2021-10-01,0.0,0
2021-10-05,0.148182,1


# FBProphet Time Series Analysis
In order to properly backtest with sentiment, a certain amount of training days need to come before the first date from the sentiment DF

In [56]:
def getPrices(ticker, training_days, sent_df, mov_avg):
    """
    Using a sentiment DataFrame to find the first day and trains with the previous
    N days to make predictions that cooperate with sentiment.
    
    Also prepares the price data for FBProphet.
    """
    # The first day from the sentiment DF minus the amount of days to train with
    ago = datetime.strptime(
        sent_df.index[0], "%Y-%m-%d"
    ) - timedelta(
        days=training_days+mov_avg
    )
    
    # Getting prices
    prices = pd.DataFrame(
        client.get_prices_eod(
            ticker+"-USD.CC", 
            from_=ago.strftime("%Y-%m-%d")
        )
    )

    # Set index
    prices = prices.set_index('date', drop=True)
    
    # Getting the N Day Moving Average and rounding the values for some light data preprocessing
    prices['MA'] = prices[['open']].rolling(
        window=mov_avg
    ).mean().apply(lambda x: round(x, 2))

    # Dropping Nans
    prices.dropna(inplace=True)

    # Resetting format for FBP
    prices = prices.reset_index().rename(
        columns={"date": "ds", "MA": "y"}
    )
    
    return prices

In [87]:
prices = getPrices("BTC", 365, sent_df, 5)

prices

Unnamed: 0,ds,open,high,low,close,adjusted_close,volume,y
0,2020-09-18,10946.2442,11035.4876,10836.4234,10935.3768,10935.3768,17023995991,10744.65
1,2020-09-19,10949.5601,11142.1776,10920.7426,11095.1677,11095.1677,13759271061,10868.20
2,2020-09-20,11095.1629,11095.1629,10814.1651,10930.4456,10930.4456,14629736360,10950.36
3,2020-09-21,10932.2450,10988.3972,10391.1061,10455.8932,10455.8932,17829479597,10976.97
4,2020-09-22,10452.4406,10572.7500,10390.4856,10549.9413,10549.9413,14486593090,10875.13
...,...,...,...,...,...,...,...,...
455,2021-12-17,47666.5634,47959.7768,45918.0921,46342.6802,46342.6802,38541915812,48378.82
456,2021-12-18,46250.2676,47215.8890,45712.3787,46981.3627,46981.3627,29878025147,47611.26
457,2021-12-19,46906.1835,47990.6863,46582.4147,46826.6224,46826.6224,30169729900,47630.41
458,2021-12-20,46770.0877,47210.1084,45708.6317,47059.1094,47059.1094,37688401232,47298.42


## Function to Make Predictions

In [88]:
def fbpTrainPredict(df, forecast_period):
    """
    Uses FB Prophet and fits to a appropriately formatted DF. Makes a prediction N days into 
    the future based on given forecast period. Returns predicted values as a DF.
    """
    # Setting up prophet
    m = Prophet(
        daily_seasonality=True, 
        yearly_seasonality=True, 
        weekly_seasonality=True
    )
    
    # Fitting to the prices
    m.fit(df[['ds', 'y']])
    
    # Future DF
    future = m.make_future_dataframe(periods=forecast_period)
        
    # Predicting values
    forecast = m.predict(future)

    # Returning a set of predicted values
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [101]:
def runningFBP(prices, forecast_period, training_days):
    """
    Runs Facebook Prophet to get predictions over a set period 
    of time. Uses FBP to train and predict everyday and gets the 
    prediction made for the nth day.
    """
    # DF for the predicted values
    pred_df = pd.DataFrame()
    
    # Training amount
    train = training_days - forecast_period

    # Running the model on each day
    for i in tqdm(range(train, len(prices))):

        # Training and Predicting the last day on the forecast
        forecast = fbpTrainPredict(
            prices[i-train:i], 
            forecast_period
        ).tail(1)[
            [
                'ds',
                'yhat',
                'yhat_lower',
                'yhat_upper'
            ]
        ]

        # Adding the last day predicted
        pred_df = pred_df.append(forecast, ignore_index=True)
        
    # Prepping for merge by converting date values to be the same type
    pred_df['ds'] = pred_df['ds'].apply(lambda x: str(x)[:10])

    prices['ds'] = prices['ds'].apply(lambda x: str(x)[:10])
    
    # Merging with the original prices DF in order to get the actual values
    merge_df = prices[['ds', 'open']].merge(
        pred_df,
        on='ds',
        how='outer').dropna()

    return merge_df

In [102]:
pred_df = runningFBP(prices, 10, 365)

100%|██████████| 105/105 [01:05<00:00,  1.59it/s]


In [103]:
pred_df

Unnamed: 0,ds,open,yhat,yhat_lower,yhat_upper
364,2021-09-17,47818.2536,51980.555864,49520.798755,54273.554608
365,2021-09-18,47298.7756,52059.054515,49685.937191,54574.338212
366,2021-09-19,48322.2203,51917.492103,49583.785315,54253.241309
367,2021-09-20,47277.7951,51616.571020,49078.302305,54024.134557
368,2021-09-21,43004.2889,50655.963306,48155.356116,53046.640802
...,...,...,...,...,...
455,2021-12-17,47666.5634,44794.001622,42345.885389,47552.101473
456,2021-12-18,46250.2676,44280.915980,41717.524081,46959.829359
457,2021-12-19,46906.1835,43920.001436,41427.272003,46623.879173
458,2021-12-20,46770.0877,45332.570452,42766.803773,47756.524341


In [96]:
def fbpPositions(pred_df):
    """
    Gets positions based on the predictions and the actual values.
    """
    

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2021-09-17,51980.555864,49611.160369,54432.339678
1,2021-09-18,52059.054515,49666.615906,54662.279233
2,2021-09-19,51917.492103,49565.611705,54177.218901
3,2021-09-20,51616.571020,49184.453530,54079.309548
4,2021-09-21,50655.963306,48211.547836,53302.233520
...,...,...,...,...
100,2021-12-26,57693.950347,54871.158092,60152.533412
101,2021-12-27,59412.906847,56864.832363,62045.032404
102,2021-12-28,60207.344084,57794.682684,62865.286935
103,2021-12-29,61443.906143,58887.171052,63842.379469


In [97]:
prices

Unnamed: 0,ds,open,high,low,close,adjusted_close,volume,y
0,2020-09-18,10946.2442,11035.4876,10836.4234,10935.3768,10935.3768,17023995991,10744.65
1,2020-09-19,10949.5601,11142.1776,10920.7426,11095.1677,11095.1677,13759271061,10868.20
2,2020-09-20,11095.1629,11095.1629,10814.1651,10930.4456,10930.4456,14629736360,10950.36
3,2020-09-21,10932.2450,10988.3972,10391.1061,10455.8932,10455.8932,17829479597,10976.97
4,2020-09-22,10452.4406,10572.7500,10390.4856,10549.9413,10549.9413,14486593090,10875.13
...,...,...,...,...,...,...,...,...
455,2021-12-17,47666.5634,47959.7768,45918.0921,46342.6802,46342.6802,38541915812,48378.82
456,2021-12-18,46250.2676,47215.8890,45712.3787,46981.3627,46981.3627,29878025147,47611.26
457,2021-12-19,46906.1835,47990.6863,46582.4147,46826.6224,46826.6224,30169729900,47630.41
458,2021-12-20,46770.0877,47210.1084,45708.6317,47059.1094,47059.1094,37688401232,47298.42


In [92]:
pred_df['ds'] = pred_df['ds'].apply(lambda x: str(x)[:10])

prices['ds'] = prices['ds'].apply(lambda x: str(x)[:10])



In [99]:
prices[['ds', 'open']].merge(pred_df,
                             on='ds',
                             how='outer').dropna()

Unnamed: 0,ds,open,yhat,yhat_lower,yhat_upper
364,2021-09-17,47818.2536,51980.555864,49611.160369,54432.339678
365,2021-09-18,47298.7756,52059.054515,49666.615906,54662.279233
366,2021-09-19,48322.2203,51917.492103,49565.611705,54177.218901
367,2021-09-20,47277.7951,51616.571020,49184.453530,54079.309548
368,2021-09-21,43004.2889,50655.963306,48211.547836,53302.233520
...,...,...,...,...,...
455,2021-12-17,47666.5634,44794.001622,42114.164771,47260.017017
456,2021-12-18,46250.2676,44280.915980,41621.311039,46874.862721
457,2021-12-19,46906.1835,43920.001436,41171.526235,46397.169095
458,2021-12-20,46770.0877,45332.570452,42809.785180,47998.545209


In [100]:
sent_df

Unnamed: 0_level_0,sentiment,sentiment_positions
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-19,0.132955,1
2021-09-20,0.125,1
2021-09-21,0.0,0
2021-09-24,0.0,0
2021-09-27,0.25,1
2021-09-28,-0.25,-1
2021-09-29,0.0,0
2021-09-30,0.0,0
2021-10-01,0.0,0
2021-10-05,0.148182,1
