# Sentiment and Time Series ML with Crypto

In [1]:
# Libraries
from eod import EodHistoricalData
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import nltk
from textblob import TextBlob
import numpy as np
import random
import plotly.express as px
from prophet import Prophet

# Importing and assigning the api key
with open("../../eodHistoricalData-API.txt", "r") as f:
    api_key = f.read()
    
# EOD Historical Data client
client = EodHistoricalData(api_key)

# News and Price Data

In [2]:
def getNews(ticker, days):
    """
    Retrieves financial news over the course of a specified number of days for
    a given stock ticker.
    """

    # List of news
    news = []
    
    # How many days back to retrieve
    ago = datetime.now() - timedelta(days=days)

    # Getting news over the course of a year
    for i in tqdm(range(10, days, 10)):

        # Grabbing the news
        resp = client.get_financial_news(
            s=ticker+".CC",
            from_=(ago+timedelta(days=i-10)).strftime("%Y-%m-%d"),
            to=(ago+timedelta(days=i)).strftime("%Y-%m-%d"),
            limit=100
        )

        # Adding to the news list
        news.extend(resp)
                
    # Filtering out irrelevant news
    lst = [i for i in news if sum(x in {ticker.lower()} for x in nltk.word_tokenize(i['title'].lower()))>=1]
    
    # Formatting the date
    news = pd.DataFrame(lst)
    
    news['date'] = news['date'].apply(lambda x: x[:10])
    
    return news

In [3]:
# Retrieving financial news
news = getNews("BTC", 100)

100%|██████████| 9/9 [00:08<00:00,  1.01it/s]


# Sentiment Analysis

In [4]:
def sentimentPositions(val, thresh=0.1):
    """
    Returns position as 1, -1, or 0 for Buy, Sell, 
    and Do Nothing respectively based on the given 
    sentiment value and threshold.
    """
    if val > thresh:
        return 1
    elif val< -thresh:
        return -1
    else:
        return 0

In [5]:
# Getting sentiment values for the news headlines/titles
news['sentiment'] = news['title'].apply(
    lambda x: TextBlob(x.lower()).sentiment[0]
)

# Grouping together dates and aggregating sentiment scores from the same day
sent_df = news.groupby('date')[['sentiment']].mean()

# Applying the position function
sent_df['sentiment_positions'] = sent_df['sentiment'].apply(
    lambda x: sentimentPositions(x, thresh=0)
)

In [6]:
sent_df

Unnamed: 0_level_0,sentiment,sentiment_positions
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-19,0.132955,1
2021-09-20,0.125,1
2021-09-21,0.0,0
2021-09-24,0.0,0
2021-09-27,0.25,1
2021-09-28,-0.25,-1
2021-09-29,0.0,0
2021-09-30,0.0,0
2021-10-01,0.0,0
2021-10-05,0.148182,1


# FBProphet Time Series Analysis
In order to properly backtest with sentiment, a certain amount of training days need to come before the first date from the sentiment DF

In [21]:
def getPrices(ticker, training_days, sent_df):
    """
    Using a sentiment DataFrame to find the first day and trains with the previous
    N days to make predictions that cooperate with sentiment.
    
    Also prepares the price data for FBProphet.
    """
    # Amount of days to train with
    days = training_days

    # The first day from the sentiment DF minus the amount of days to train with
    ago = datetime.strptime(sent_df.index[0], "%Y-%m-%d") - timedelta(days=days)

    # Getting prices
    prices = pd.DataFrame(client.get_prices_eod(ticker+"-USD.CC", from_=ago.strftime("%Y-%m-%d")))

    # Set index
    prices = prices.set_index('date', drop=True)
    
    # Getting the N Day Moving Average and rounding the values for some light data preprocessing
    prices['MA'] = prices[['open']].rolling(window=5).mean().apply(lambda x: round(x, 2))

    # Dropping Nans
    prices.dropna(inplace=True)

    # Resetting format for FBP
    prices = prices.reset_index().rename(columns={"date": "ds", "MA": "y"})
    
    return prices

In [19]:
prices = getPrices("BTC", 365, sent_df)

In [20]:
prices

Unnamed: 0,ds,open,high,low,close,adjusted_close,volume,y
0,2020-09-23,10550.0804,10550.0804,10193.9364,10255.6420,10255.6420,14661808898,10795.90
1,2020-09-24,10255.1112,10768.1380,10243.4052,10742.5812,10742.5812,15564198903,10657.01
2,2020-09-25,10744.2385,10762.7511,10595.9050,10700.2197,10700.2197,14210149745,10586.82
3,2020-09-26,10703.2859,10784.8401,10680.6945,10761.7509,10761.7509,12548175041,10541.03
4,2020-09-27,10750.6456,10805.6369,10635.6562,10776.1498,10776.1498,12733405567,10600.67
...,...,...,...,...,...,...,...,...
450,2021-12-17,47666.5634,47959.7768,45918.0921,46342.6802,46342.6802,38541915812,48378.82
451,2021-12-18,46250.2676,47215.8890,45712.3787,46981.3627,46981.3627,29878025147,47611.26
452,2021-12-19,46906.1835,47990.6863,46582.4147,46826.6224,46826.6224,30169729900,47630.41
453,2021-12-20,46770.0877,47210.1084,45708.6317,47059.1094,47059.1094,37688401232,47298.42
