# Sentiment and Time Series ML with Crypto

In [1]:
# Libraries
from eod import EodHistoricalData
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import nltk
from textblob import TextBlob
import numpy as np
import random
import plotly.express as px
from prophet import Prophet

# Importing and assigning the api key
with open("../../eodHistoricalData-API.txt", "r") as f:
    api_key = f.read()
    
# EOD Historical Data client
client = EodHistoricalData(api_key)

# News and Price Data

In [15]:
def getNewsAndPrices(ticker, days):
    """
    Retrieves financial news over the course of a specified number of days for
    a given stock ticker.
    """

    # List of news
    news = []
    
    # How many days back to retrieve
    ago = datetime.now() - timedelta(days=days)

    # Getting news over the course of a year
    for i in tqdm(range(10, days, 10)):

        # Grabbing the news
        resp = client.get_financial_news(
            s=ticker+".CC",
            from_=(ago+timedelta(days=i-10)).strftime("%Y-%m-%d"),
            to=(ago+timedelta(days=i)).strftime("%Y-%m-%d"),
            limit=100
        )

        # Adding to the news list
        news.extend(resp)
                
    # Filtering out irrelevant news
    lst = [i for i in news if sum(x in {ticker.lower()} for x in nltk.word_tokenize(i['title'].lower()))>=1]
    
    # Formatting the date
    news = pd.DataFrame(lst)
    
    news['date'] = news['date'].apply(lambda x: x[:10])
    
    # Getting prices
    prices = pd.DataFrame(client.get_prices_eod(ticker+"-USD.CC", from_=ago.strftime("%Y-%m-%d")))
    
    # Set index
    prices = prices.set_index('date', drop=True)
    
    return news, prices

In [16]:
# Retrieving financial news and price history
news, prices = getNewsAndPrices("BTC", 100)

100%|██████████| 9/9 [00:07<00:00,  1.25it/s]


# Sentiment Analysis

In [21]:
def sentimentPositions(val, thresh=0.1):
    """
    Returns position as 1, -1, or 0 for Buy, Sell, 
    and Do Nothing respectively based on the given 
    sentiment value and threshold.
    """
    if val > thresh:
        return 1
    elif val< -thresh:
        return -1
    else:
        return 0

In [22]:
# Getting sentiment values for the news headlines/titles
news['sentiment'] = news['title'].apply(
    lambda x: TextBlob(x.lower()).sentiment[0]
)

# Grouping together dates and aggregating sentiment scores from the same day
sent_df = news.groupby('date')[['sentiment']].mean()

# Applying the position function
sent_df['sentiment_positions'] = sent_df['sentiment'].apply(
    lambda x: sentimentPositions(x, thresh=0)
)

In [23]:
sent_df

Unnamed: 0_level_0,sentiment,sentiment_positions
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-20,0.125,1
2021-09-21,0.0,0
2021-09-22,0.0,0
2021-09-23,0.116667,1
2021-09-24,0.0,0
2021-09-27,0.25,1
2021-09-30,0.0,0
2021-10-01,0.0,0
2021-10-03,0.25,1
2021-10-04,0.068182,1


# FBProphet Time Series Analysis

In [24]:
prices

Unnamed: 0_level_0,open,high,low,close,adjusted_close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-07,52665.7536,52875.4712,46040.6604,47001.7186,47001.7186,72258785066
2021-09-08,46948.0736,47375.8832,44851.7634,46125.2103,46125.2103,53815385181
2021-09-09,46152.1959,47296.5304,45855.3246,46445.9006,46445.9006,42145201034
2021-09-10,46458.1044,47077.1280,44510.9888,44905.9224,44905.9224,41196751575
2021-09-11,44925.6059,45993.8024,44925.6059,45134.5564,45134.5564,38087420127
...,...,...,...,...,...,...
2021-12-12,49273.9673,50611.8516,48809.0909,50122.2697,50122.2697,29406503622
2021-12-13,50088.0459,50156.7202,46281.5590,46834.7653,46834.7653,36915805970
2021-12-14,46810.4473,48614.3355,46568.6195,48431.8453,48431.8453,37096313909
2021-12-15,48430.0256,49450.1066,46815.0915,48838.7513,48838.7513,39743338286


In [25]:
# Getting the N Day Moving Average and rounding the values for some light data preprocessing
prices['MA'] = prices[['open']].rolling(window=5).mean().apply(lambda x: round(x, 2))

# Dropping Nans
prices.dropna(inplace=True)

# Resetting format for FBP
prices = prices.reset_index().rename(columns={"date": "ds", "MA": "y"})

In [26]:
prices

Unnamed: 0,ds,open,high,low,close,adjusted_close,volume,y
0,2021-09-11,44925.6059,45993.8024,44925.6059,45134.5564,45134.5564,38087420127,47429.95
1,2021-09-12,45199.7642,46409.1619,44915.6236,46141.1329,46141.1329,31309756182,45936.75
2,2021-09-13,46124.5916,46467.0069,43973.4083,45098.4153,45098.4153,46064300003,45772.05
3,2021-09-14,45047.6418,47224.1355,44975.5790,47068.1670,47068.1670,44795960618,45551.14
4,2021-09-15,47102.8249,48470.7902,46834.0583,48156.6441,48156.6441,34075640316,45680.09
...,...,...,...,...,...,...,...,...
92,2021-12-12,49273.9673,50611.8516,48809.0909,50122.2697,50122.2697,29406503622,49159.85
93,2021-12-13,50088.0459,50156.7202,46281.5590,46834.7653,46834.7653,36915805970,49039.53
94,2021-12-14,46810.4473,48614.3355,46568.6195,48431.8453,48431.8453,37096313909,48307.50
95,2021-12-15,48430.0256,49450.1066,46815.0915,48838.7513,48838.7513,39743338286,48417.55
