## Importing the Dataset

In [1]:
import pandas as pd

In [2]:
dataset_path = '../../dataset/'

In [3]:
df_news = pd.read_csv(dataset_path + '/news/news [only_date].csv')
df_price = pd.read_csv(dataset_path + '/price/btc_usd_daily.csv')

In [4]:
df_news.head()

Unnamed: 0,date,headline,source
0,2020-03-16,"Bitcoin Risks Falling to $2,900 if Market is H...",https://www.newsbtc.com/2020/03/14/bitcoin-ris...
1,2020-03-16,"Here’s Why Bitcoin Didn’t Bottom at $3,800 Acc...",https://www.newsbtc.com/2020/03/15/heres-why-b...
2,2020-03-16,"Bitcoin Sinks to $4,390 as Dow’s 1,000-Point D...",https://www.newsbtc.com/2020/03/16/bitcoin-sin...
3,2020-03-16,Bitcoin could soon be in the museum of illusio...,https://eng.ambcrypto.com/bitcoin-could-soon-b...
4,2020-03-16,Bitcoin Price Analysis: BTC’s 3-Day Consolidat...,https://cryptopotato.com/bitcoin-price-analysi...


In [5]:
df_news.shape

(17047, 3)

In [6]:
df_price.head()

Unnamed: 0,date,open,close,high,low,change,volume,market_cap
0,2020-03-24,6436.64,6734.8,6789.02,6411.07,4.63,48221910672,123148917787
1,2020-03-23,5831.37,6416.31,6443.93,5785.0,10.03,46491916000,117314776187
2,2020-03-22,6185.56,5830.25,6359.7,5823.71,-5.74,40099664740,106591196069
3,2020-03-21,6206.52,6185.07,6378.14,5932.82,-0.35,42494390880,113068192795
4,2020-03-20,6191.65,6198.78,6844.26,5865.78,0.12,54442976103,113309245860


In [7]:
df_price.shape

(2522, 8)

## Data cleaning and preprocessing

In [8]:
# Grouping the news according to date
date_index = df_news.groupby(['date']).groups
keys = list(date_index.keys())

dict = {}

for key in keys:
    titles = df_news[df_news['date'] == key]['headline']
    titles = titles.values.tolist()
    dict[key] = titles
    
df_grouped_news = pd.DataFrame(list(dict.values()))
df_grouped_news.insert(0, "date", keys)
df_grouped_news.head()

Unnamed: 0,date,0,1,2,3,4,5,6,7,8,...,32,33,34,35,36,37,38,39,40,41
0,2017-01-01,Colombia Declares Bitcoin and Other Digital Cu...,Bitcoin Consolidation Continues,"The price of bitcoin passed $1,000 during the ...",The New Year Could Bode Well for Bitcoin and B...,Bitcoin Starts 2017 at the $1000,The European Union Wants to Identify Bitcoin U...,,,,...,,,,,,,,,,
1,2017-01-02,Bitspark's George Harrap lists his takeaways f...,Do 2016's political changes foreshadow blockch...,Republic of Georgia to Introduce Blockchain Pl...,Can Cryptocoins Be Environmentally Friendly?,Mexico’s Largest Bank Acquires Fintech Payment...,Will Bitcoin Reach an All-Time High?,Vitalik Buterin: Bitcoin More Likely Than Ethe...,How Blockchain Technology Can Improve the Airl...,Bitcoin Price is Trading Beyond $1000 in 2017,...,,,,,,,,,,
2,2017-01-03,State Street bank is on the verge of putting s...,Market intelligence firm Greenwich Associates ...,India's fifth-largest bank has built a blockch...,Argentinian bitcoin startup Ripio has raised $...,What's in store for Delaware's blockchain work...,Spanish banking group BBVA has released a new ...,Financial Times Writer Says Bitcoin is Worth “...,Bitcoin Has Hit a Price Target,Dutch and Korean Mainstream Media Extensively ...,...,,,,,,,,,,
3,2017-01-04,Charlie Shrem is re-imagining investment on th...,Kraken announced 3rd January that it had becom...,The price of bitcoin is now just $110 short of...,PwC's Ajit Tripathi gives CoinDesk his predict...,2016 was a quiet one for blockchain regulation...,One of the UAE's largest telcos has announced ...,The SEC has delayed its decision on the Winkle...,"Bitcoin prices surged 4th January, surpassing ...","Bitcoin prices have shot past the $1,100 mark ...",...,,,,,,,,,,
4,2017-01-05,Need a catch-up on recent price developments? ...,China Intervenes Yuan Soars Bitcoin Crashes,Iranian Banker: Bitcoin is Changing Banking; C...,FinTech in Nigeria Set to Grow,Hyperledger Blockchain Project Announces ‘Tech...,Argentine Bitcoin Wallet BitPagos Raises $1.9 ...,Newsflash: Bitcoin Price Takes a Steep Fall Be...,CoinDesk contributor Willy Woo presents an in-...,Bitcoin Rally Was Starting to Feel Unsustainable,...,,,,,,,,,,


In [9]:
df_grouped_news.shape

(1128, 43)

In [10]:
# Change the date format of dataframes
df_price['date'] = pd.to_datetime(df_price['date'])
df_grouped_news['date'] = pd.to_datetime(df_grouped_news['date'])

In [11]:
combined_data = pd.merge(df_grouped_news, df_price, on='date', how='inner')
combined_data.head()

Unnamed: 0,date,0,1,2,3,4,5,6,7,8,...,39,40,41,open,close,high,low,change,volume,market_cap
0,2017-01-01,Colombia Declares Bitcoin and Other Digital Cu...,Bitcoin Consolidation Continues,"The price of bitcoin passed $1,000 during the ...",The New Year Could Bode Well for Bitcoin and B...,Bitcoin Starts 2017 at the $1000,The European Union Wants to Identify Bitcoin U...,,,,...,,,,963.66,998.33,1003.08,958.7,3.6,147775008,16050407461
1,2017-01-02,Bitspark's George Harrap lists his takeaways f...,Do 2016's political changes foreshadow blockch...,Republic of Georgia to Introduce Blockchain Pl...,Can Cryptocoins Be Environmentally Friendly?,Mexico’s Largest Bank Acquires Fintech Payment...,Will Bitcoin Reach an All-Time High?,Vitalik Buterin: Bitcoin More Likely Than Ethe...,How Blockchain Technology Can Improve the Airl...,Bitcoin Price is Trading Beyond $1000 in 2017,...,,,,998.62,1021.75,1031.39,996.7,2.32,222184992,16429024775
2,2017-01-03,State Street bank is on the verge of putting s...,Market intelligence firm Greenwich Associates ...,India's fifth-largest bank has built a blockch...,Argentinian bitcoin startup Ripio has raised $...,What's in store for Delaware's blockchain work...,Spanish banking group BBVA has released a new ...,Financial Times Writer Says Bitcoin is Worth “...,Bitcoin Has Hit a Price Target,Dutch and Korean Mainstream Media Extensively ...,...,,,,1021.6,1043.84,1044.08,1021.6,2.18,185168000,16786368910
3,2017-01-04,Charlie Shrem is re-imagining investment on th...,Kraken announced 3rd January that it had becom...,The price of bitcoin is now just $110 short of...,PwC's Ajit Tripathi gives CoinDesk his predict...,2016 was a quiet one for blockchain regulation...,One of the UAE's largest telcos has announced ...,The SEC has delayed its decision on the Winkle...,"Bitcoin prices surged 4th January, surpassing ...","Bitcoin prices have shot past the $1,100 mark ...",...,,,,1044.4,1154.73,1159.42,1044.4,10.56,344945984,18571869009
4,2017-01-05,Need a catch-up on recent price developments? ...,China Intervenes Yuan Soars Bitcoin Crashes,Iranian Banker: Bitcoin is Changing Banking; C...,FinTech in Nigeria Set to Grow,Hyperledger Blockchain Project Announces ‘Tech...,Argentine Bitcoin Wallet BitPagos Raises $1.9 ...,Newsflash: Bitcoin Price Takes a Steep Fall Be...,CoinDesk contributor Willy Woo presents an in-...,Bitcoin Rally Was Starting to Feel Unsustainable,...,,,,1156.73,1013.38,1191.1,910.42,-12.39,510199008,16300254795


In [12]:
# Adding labels to the news
label = combined_data['change'].apply(lambda x: 1 if x>0 else 0)
combined_data['label'] = label
combined_data.head()

Unnamed: 0,date,0,1,2,3,4,5,6,7,8,...,40,41,open,close,high,low,change,volume,market_cap,label
0,2017-01-01,Colombia Declares Bitcoin and Other Digital Cu...,Bitcoin Consolidation Continues,"The price of bitcoin passed $1,000 during the ...",The New Year Could Bode Well for Bitcoin and B...,Bitcoin Starts 2017 at the $1000,The European Union Wants to Identify Bitcoin U...,,,,...,,,963.66,998.33,1003.08,958.7,3.6,147775008,16050407461,1
1,2017-01-02,Bitspark's George Harrap lists his takeaways f...,Do 2016's political changes foreshadow blockch...,Republic of Georgia to Introduce Blockchain Pl...,Can Cryptocoins Be Environmentally Friendly?,Mexico’s Largest Bank Acquires Fintech Payment...,Will Bitcoin Reach an All-Time High?,Vitalik Buterin: Bitcoin More Likely Than Ethe...,How Blockchain Technology Can Improve the Airl...,Bitcoin Price is Trading Beyond $1000 in 2017,...,,,998.62,1021.75,1031.39,996.7,2.32,222184992,16429024775,1
2,2017-01-03,State Street bank is on the verge of putting s...,Market intelligence firm Greenwich Associates ...,India's fifth-largest bank has built a blockch...,Argentinian bitcoin startup Ripio has raised $...,What's in store for Delaware's blockchain work...,Spanish banking group BBVA has released a new ...,Financial Times Writer Says Bitcoin is Worth “...,Bitcoin Has Hit a Price Target,Dutch and Korean Mainstream Media Extensively ...,...,,,1021.6,1043.84,1044.08,1021.6,2.18,185168000,16786368910,1
3,2017-01-04,Charlie Shrem is re-imagining investment on th...,Kraken announced 3rd January that it had becom...,The price of bitcoin is now just $110 short of...,PwC's Ajit Tripathi gives CoinDesk his predict...,2016 was a quiet one for blockchain regulation...,One of the UAE's largest telcos has announced ...,The SEC has delayed its decision on the Winkle...,"Bitcoin prices surged 4th January, surpassing ...","Bitcoin prices have shot past the $1,100 mark ...",...,,,1044.4,1154.73,1159.42,1044.4,10.56,344945984,18571869009,1
4,2017-01-05,Need a catch-up on recent price developments? ...,China Intervenes Yuan Soars Bitcoin Crashes,Iranian Banker: Bitcoin is Changing Banking; C...,FinTech in Nigeria Set to Grow,Hyperledger Blockchain Project Announces ‘Tech...,Argentine Bitcoin Wallet BitPagos Raises $1.9 ...,Newsflash: Bitcoin Price Takes a Steep Fall Be...,CoinDesk contributor Willy Woo presents an in-...,Bitcoin Rally Was Starting to Feel Unsustainable,...,,,1156.73,1013.38,1191.1,910.42,-12.39,510199008,16300254795,0


In [13]:
combined_data.shape

(1128, 51)

In [14]:
# Splitting the train and test sets
train = combined_data[combined_data['date'] < '20191201']
test = combined_data[combined_data['date'] > '20191130']

In [15]:
# Removing punctuations
data = train.iloc[:, 1:24]
data.replace("[^a-zA-Z]", " ", regex=True, inplace=True)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,Colombia Declares Bitcoin and Other Digital Cu...,Bitcoin Consolidation Continues,The price of bitcoin passed during the ...,The New Year Could Bode Well for Bitcoin and B...,Bitcoin Starts at the,The European Union Wants to Identify Bitcoin U...,,,,,...,,,,,,,,,,
1,Bitspark s George Harrap lists his takeaways f...,Do s political changes foreshadow blockch...,Republic of Georgia to Introduce Blockchain Pl...,Can Cryptocoins Be Environmentally Friendly,Mexico s Largest Bank Acquires Fintech Payment...,Will Bitcoin Reach an All Time High,Vitalik Buterin Bitcoin More Likely Than Ethe...,How Blockchain Technology Can Improve the Airl...,Bitcoin Price is Trading Beyond in,,...,,,,,,,,,,
2,State Street bank is on the verge of putting s...,Market intelligence firm Greenwich Associates ...,India s fifth largest bank has built a blockch...,Argentinian bitcoin startup Ripio has raised ...,What s in store for Delaware s blockchain work...,Spanish banking group BBVA has released a new ...,Financial Times Writer Says Bitcoin is Worth ...,Bitcoin Has Hit a Price Target,Dutch and Korean Mainstream Media Extensively ...,Scammers in Australia Demand Bitcoin and Gift ...,...,Bitcoin Will Get Native Support in Microsoft E...,,,,,,,,,
3,Charlie Shrem is re imagining investment on th...,Kraken announced rd January that it had becom...,The price of bitcoin is now just short of...,PwC s Ajit Tripathi gives CoinDesk his predict...,was a quiet one for blockchain regulation...,One of the UAE s largest telcos has announced ...,The SEC has delayed its decision on the Winkle...,Bitcoin prices surged th January surpassing ...,Bitcoin prices have shot past the mark ...,Bitcoin Price Blitzes Beyond Closes in o...,...,Bitcoin s Market Cap Crosses Billion,UAE Healthcare Provider to Store Patient Recor...,Bitcoin Usage Gains Traction in Indonesia,Yes Bank Develops Blockchain to Digitize Vendo...,Bitcoin Makes History Reaches Gold Parity,Despite the hype around enterprise blockchain ...,Insurance market advisors to the US Treasury D...,A new bitcoin software update boasts more than...,,
4,Need a catch up on recent price developments ...,China Intervenes Yuan Soars Bitcoin Crashes,Iranian Banker Bitcoin is Changing Banking C...,FinTech in Nigeria Set to Grow,Hyperledger Blockchain Project Announces Tech...,Argentine Bitcoin Wallet BitPagos Raises ...,Newsflash Bitcoin Price Takes a Steep Fall Be...,CoinDesk contributor Willy Woo presents an in ...,Bitcoin Rally Was Starting to Feel Unsustainable,Hacker Compromises Bitcoin Wallet KeepKey Foun...,...,CoinDesk s Corin Faife recaps how last year sa...,Bitcoin prices plunged nearly in an hour ...,Bitcoin exchange Bitstamp s funding campaign h...,Industry insiders share their thoughts about a...,,,,,,


In [16]:
# Converting titles to lowercase
rows, cols = data.shape
for index in range(0, cols):
    data[index] = data[index].str.lower()
    
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,colombia declares bitcoin and other digital cu...,bitcoin consolidation continues,the price of bitcoin passed during the ...,the new year could bode well for bitcoin and b...,bitcoin starts at the,the european union wants to identify bitcoin u...,,,,,...,,,,,,,,,,
1,bitspark s george harrap lists his takeaways f...,do s political changes foreshadow blockch...,republic of georgia to introduce blockchain pl...,can cryptocoins be environmentally friendly,mexico s largest bank acquires fintech payment...,will bitcoin reach an all time high,vitalik buterin bitcoin more likely than ethe...,how blockchain technology can improve the airl...,bitcoin price is trading beyond in,,...,,,,,,,,,,
2,state street bank is on the verge of putting s...,market intelligence firm greenwich associates ...,india s fifth largest bank has built a blockch...,argentinian bitcoin startup ripio has raised ...,what s in store for delaware s blockchain work...,spanish banking group bbva has released a new ...,financial times writer says bitcoin is worth ...,bitcoin has hit a price target,dutch and korean mainstream media extensively ...,scammers in australia demand bitcoin and gift ...,...,bitcoin will get native support in microsoft e...,,,,,,,,,
3,charlie shrem is re imagining investment on th...,kraken announced rd january that it had becom...,the price of bitcoin is now just short of...,pwc s ajit tripathi gives coindesk his predict...,was a quiet one for blockchain regulation...,one of the uae s largest telcos has announced ...,the sec has delayed its decision on the winkle...,bitcoin prices surged th january surpassing ...,bitcoin prices have shot past the mark ...,bitcoin price blitzes beyond closes in o...,...,bitcoin s market cap crosses billion,uae healthcare provider to store patient recor...,bitcoin usage gains traction in indonesia,yes bank develops blockchain to digitize vendo...,bitcoin makes history reaches gold parity,despite the hype around enterprise blockchain ...,insurance market advisors to the us treasury d...,a new bitcoin software update boasts more than...,,
4,need a catch up on recent price developments ...,china intervenes yuan soars bitcoin crashes,iranian banker bitcoin is changing banking c...,fintech in nigeria set to grow,hyperledger blockchain project announces tech...,argentine bitcoin wallet bitpagos raises ...,newsflash bitcoin price takes a steep fall be...,coindesk contributor willy woo presents an in ...,bitcoin rally was starting to feel unsustainable,hacker compromises bitcoin wallet keepkey foun...,...,coindesk s corin faife recaps how last year sa...,bitcoin prices plunged nearly in an hour ...,bitcoin exchange bitstamp s funding campaign h...,industry insiders share their thoughts about a...,,,,,,


In [17]:
titles = []

for row in range(0, rows):
    titles.append(' '.join(str(x) for x in data.iloc[row, 0: cols] if str(x) != 'None'))
    
titles[0]

'colombia declares bitcoin and other digital currencies illegal bitcoin consolidation continues the price of bitcoin passed        during the first day of trading in       the new year could bode well for bitcoin and blockchains bitcoin starts      at the       the european union wants to identify bitcoin users'

## Training model

In [18]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier

In [19]:
# implement Bag of Words
countvector = CountVectorizer(ngram_range=(2, 2))
traindataset = countvector.fit_transform(titles)

In [20]:
# implement RandomForest Classifier
randomClassifier = RandomForestClassifier(n_estimators=200, criterion='entropy')
randomClassifier.fit(traindataset, train['label'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [21]:
# Predict for test dataset
test_transform = []

for row in range(0, len(test.index)):
    test_transform.append(' '.join(str(x) for x in test.iloc[row, 1:cols] if str(x) != 'None'))
test_dataset = countvector.transform(test_transform)
predictions = randomClassifier.predict(test_dataset)
test_transform[1]

'Vertcoin 51% Attack ‘Motive Uncertain’ as Hackers Lose up to $4,000 P2P Bitcoin Marketplace Paxful Set to Surpass LocalBitcoins in Volume Binance Crypto Exchange Adds Four Trading Pairs for Russian Ruble Bitcoin’s efficiency improvements are a really big deal: Matt Odell OneCoin Website Goes Offline as Net Closes in on $4B Ponzi Scheme Price Analysis 02/12: BTC, ETH, XRP, BCH, LTC, EOS, BNB, BSV, XLM, TRX Bitcoin Price: Cracking $7.4K Opens Path to $8.1K Resistance — Analyst Binance’s CZ: This Vibrant Country Is One of the Fastest-Growing Crypto Nations Former CFTC Chair to Remain Focused on Crypto and Blockchain at New Law Firm US Stock Markets to Have a Slow 2020, Will Investors Turn to Bitcoin and Crypto? 459 Days Until BTC Hits Its Longest Streak Without a New All-Time High Litecoin-Funded Grin Developer Challenges Mimblewimble’s Privacy Issue Three Reasons Why Fundstrat’s Tom Lee Is Optimistic About Bitcoin’s Price Next Year Analysts Flip Long on Bitcoin After Bulls Thwart Brief 

In [22]:
## Import library to check accuracy
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [23]:
matrix = confusion_matrix(test['label'], predictions)
print(matrix)

score = accuracy_score(test['label'], predictions)
print(score)

report = classification_report(test['label'], predictions)
print(report)

[[ 1 57]
 [ 1 48]]
0.45794392523364486
              precision    recall  f1-score   support

           0       0.50      0.02      0.03        58
           1       0.46      0.98      0.62        49

    accuracy                           0.46       107
   macro avg       0.48      0.50      0.33       107
weighted avg       0.48      0.46      0.30       107

