# Initial Model Testing

In [20]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import re
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

from sklearn.preprocessing import MultiLabelBinarizer

print(tf.__version__)

2.0.0-alpha0


In [21]:
def tokenize_and_pad(test_sentence):
    test_sentence = tokenizer.texts_to_sequences([test_sentence])
    test_sentence = sequence.pad_sequences(test_sentence, maxlen=max_length, padding="post")
    return test_sentence

In [22]:
from tensorflow.keras.preprocessing import text, sequence
import pickle

with open('tokenizer_imdb.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
    
max_length = 2722
vocab_size = len(tokenizer.word_index) + 1

# test_sentence = ['Sapura Energy bags 5 new contracts worth RM1.3 billion - Free Malaysia Today', 
#                  'BREAKING: Trump just signed two executive orders that seek to expedite permits for pipelines and other fossil fuel projects by restricting public input and states authority.',
#                 'Number of companies producing oil and gas in Western Canada drops 17.5% since 2014', 
#                 'Wheelchair customers stuck out in the pouring rain when the cab was booked in advance is OUTRAGEOUS. This cab is now 55 minutes late and my client has missed a VERY important DRs appt. BRING #Uber TO VANCOUVER if anything just to SCREW with @vancouvertaxi customers. @NEWS1130',
#                 'Uber worst service... Cab booked to take 1.5 year child to doctor and cab did not come more then 1hr driver not answering and not cancelling the ride...',
#                 'That #Uber ad sums up that company!! I’ve no sympathy for anyone male or female who uses em and becomes unstuck! They’re not even fucking cheap that’s a myth! Use your local mini cab firm n black cabs in London! Let’s keep money in England ay n drive this firm out!',
#                 'Uber is awesome!',
#                 'Looking forward to your  presentation at Benzinga next week! Armenians represent! ']


test_sentence = tokenize_and_pad('Sapura Energy bags 5 new contracts worth RM1.3 billion - Free Malaysia Today')


# test_sentence = tokenizer.texts_to_sequences(test_sentence)
# test_sentence = sequence.pad_sequences(test_sentence, maxlen=max_length, padding="post")
test_sentence

array([[1699, 8860,  451, ...,    0,    0,    0]], dtype=int32)

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Embedding, SpatialDropout1D, Dropout, add, concatenate, Flatten, Embedding
from tensorflow.keras.layers import Bidirectional, GlobalMaxPooling1D, GlobalAveragePooling1D, Input, LSTM
# from tensorflow.compat.v1.keras.layers import CuDNNLSTM, CuDNNGRU

ModuleNotFoundError: No module named 'tensorflow.compat.v1'

In [23]:
BATCH_SIZE = 64
EMBEDDING_DIM = 500
LSTM_UNITS = 256
DENSE_UNITS = LSTM_UNITS * 4

class MyLSTM(tf.keras.Model):
    def __init__(self):
        super(MyLSTM, self).__init__()
        self.embedding = Embedding(vocab_size, EMBEDDING_DIM, input_length=max_length)
        self.dropout1 = SpatialDropout1D(0.3)
        self.lstm1 = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True, dropout=0.2))
        self.gmp = GlobalMaxPooling1D()
        self.dense1 = Dense(100, activation='relu')
        self.dropout2 = Dropout(0.2)
        self.denseOut = Dense(1, activation='sigmoid')
    
    def call(self, x):
        x = self.embedding(x)
        x = self.dropout1(x)
        x = self.lstm1(x)
        x = self.gmp(x)
        x = self.dense1(x)
        x = self.dropout2(x)
        x = self.denseOut(x)
        return x
    
model = MyLSTM()
model.build(input_shape=(vocab_size, max_length))
model.load_weights('pre-deploy_models/imdbEager.h5')

W0414 17:45:30.686372 140124052678464 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x7f70981747b8>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.
W0414 17:45:30.692727 140124052678464 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x7f6d1518a198>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.


In [None]:
def build_model(embedding_matrix):
    words = Input(shape=(max_length,))
    x = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(words)
    x = SpatialDropout1D(0.3)(x)
    x = Bidirectional(CuDNNLSTM(LSTM_UNITS, 
                           return_sequences=True))(x)
    x = Bidirectional(CuDNNLSTM(LSTM_UNITS, 
                           return_sequences=True))(x)

    hidden = concatenate([
        GlobalMaxPooling1D()(x),
        GlobalAveragePooling1D()(x),
    ])
    hidden = add([hidden, Dense(DENSE_UNITS, activation='relu')(hidden)])
    hidden = Dropout(0.2)(hidden)
    hidden = add([hidden, Dense(DENSE_UNITS, activation='relu')(hidden)])
    result = Dense(1, activation='sigmoid')(hidden)
    
    model = Model(inputs=words, outputs=result)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    return model

In [24]:
model.predict(test_sentence)

array([[0.99991095]], dtype=float32)

# Scraper Feed

In [104]:

from urllib.request import urlopen, HTTPError
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import time
import pprint

date_sentiments = {}

for i in range(1,11):
    page = urlopen('https://www.businesstimes.com.sg/search/crude%2Boil?page='+str(i)).read()
#     page = urlopen('https://www.businesstimes.com.sg/search/facebook?page='+str(i)).read()
    soup = BeautifulSoup(page, features="html.parser")
    posts = soup.findAll("div", {"class": "media-body"})
    for post in posts:
        time.sleep(1)
        url = post.a['href']
        date = post.time.text
        print(date, url)
        try:
            link_page = urlopen(url).read()
        except HTTPError as e:
            if e.code == 403:
                continue
            url = url[:-2]
            link_page = urlopen(url).read()
        link_soup = BeautifulSoup(link_page)
        sentences = link_soup.findAll("p")
        passage = ""
        for sentence in sentences:
            passage += sentence.text
        sentiment = model(tokenize_and_pad(passage))
        date_sentiments.setdefault(date, []).append(sentiment)

13 Apr 2019 https://www.businesstimes.com.sg/energy-commodities/oil-rises-1-on-tightening-crude-supply-upbeat-economic-data
11 Apr 2019 https://www.businesstimes.com.sg/energy-commodities/oil-rallies-as-us-petrol-inventory-draw-offsets-crude-build
10 Apr 2019 https://www.businesstimes.com.sg/companies-markets/crude-oil-price-rally-propels-commodity-currencies
08 Apr 2019 https://www.businesstimes.com.sg/energy-commodities/crude-oil-market-rallies-on-fading-fears-of-a-glut
05 Apr 2019 https://www.businesstimes.com.sg/energy-commodities/oil-eases-as-us-crude-stocks-build-up
04 Apr 2019 https://www.businesstimes.com.sg/energy-commodities/oil-eases-on-us-crude-stock-build-but-holds-near-5-month-high
01 Apr 2019 https://www.businesstimes.com.sg/government-economy/saudi-arabias-economy-expands-in-q4-on-boost-from-crude-non-oil-growth-slows
28 Mar 2019 https://www.businesstimes.com.sg/energy-commodities/oil-prices-sink-after-surprise-us-crude-stock-build
20 Mar 2019 https://www.businesstimes.

08 Feb 2018 http://www.businesstimes.com.sg/energy-commodities/report-of-lower-us-crude-inventories-gives-lift-to-oil-prices
07 Feb 2018 http://www.businesstimes.com.sg/energy-commodities/oil-prices-rise-on-report-of-lower-us-crude-inventories
26 Jan 2018 http://www.businesstimes.com.sg/energy-commodities/oil-firms-as-us-dollar-falls-further-but-weaker-crude-demand-looms
24 Jan 2018 http://www.businesstimes.com.sg/energy-commodities/oil-hedge-fund-bbl-sees-crude-at-us80-after-opec-cuts
19 Jan 2018 http://www.businesstimes.com.sg/energy-commodities/saudis-refined-oil-exports-offset-crude-curbs
19 Jan 2018 http://www.businesstimes.com.sg/energy-commodities/oil-little-changed-after-record-crude-draw-at-us-hub
11 Jan 2018 http://www.businesstimes.com.sg/energy-commodities/us-crude-inventory-fall-buoys-oil-worries-about-rally-persist
21 Dec 2017 http://www.businesstimes.com.sg/energy-commodities/fall-in-us-crude-oil-stocks-north-sea-outage-supports-prices
14 Dec 2017 http://www.businesstime

In [105]:
date_sentiment = {}

for k,v in date_sentiments.items():
    date_sentiment[datetime.strptime(k, '%d %b %Y').date() + timedelta(days=1)] = round(sum([np.asscalar(o.numpy()) for o in v])/float(len(v)),3)

earliest_date = min(date_sentiment.keys())

print(date_sentiment)

{datetime.date(2019, 4, 14): 0.0, datetime.date(2019, 4, 12): 0.0, datetime.date(2019, 4, 11): 1.0, datetime.date(2019, 4, 9): 0.0, datetime.date(2019, 4, 6): 0.0, datetime.date(2019, 4, 5): 0.0, datetime.date(2019, 4, 2): 0.0, datetime.date(2019, 3, 29): 0.0, datetime.date(2019, 3, 21): 0.002, datetime.date(2019, 3, 12): 0.0, datetime.date(2019, 3, 8): 0.0, datetime.date(2019, 3, 1): 0.016, datetime.date(2019, 2, 23): 0.0, datetime.date(2019, 2, 12): 0.0, datetime.date(2019, 2, 2): 0.0, datetime.date(2019, 1, 23): 0.0, datetime.date(2019, 1, 15): 0.0, datetime.date(2019, 1, 2): 0.0, datetime.date(2018, 12, 4): 0.998, datetime.date(2018, 11, 11): 0.0, datetime.date(2018, 11, 10): 0.007, datetime.date(2018, 11, 9): 0.0, datetime.date(2018, 11, 7): 1.0, datetime.date(2018, 10, 19): 0.999, datetime.date(2018, 10, 17): 0.0, datetime.date(2018, 10, 11): 0.0, datetime.date(2018, 10, 9): 0.0, datetime.date(2018, 9, 4): 0.0, datetime.date(2018, 8, 24): 0.0, datetime.date(2018, 8, 17): 0.0, dat

In [121]:
list(date_sentiment.values())[:5]

[0.0, 0.0, 1.0, 0.0, 0.0]

# Back trader

In [106]:
import backtrader as bt
import backtrader.indicators as btind
import datetime
import os.path
import sys

class Sentiment(bt.Indicator):
    lines = ('sentiment',)
    plotinfo = dict(
        plotymargin=0.15,
        plothlines=[0],
        plotyticks=[1.0, 0, -1.0])
    
    def next(self):
        self.date = self.data.datetime
        date = bt.num2date(self.date[0]).date()
        prev_sentiment = self.sentiment
        if date in date_sentiment:
            self.sentiment = date_sentiment[date]
        self.lines.sentiment[0] = self.sentiment


class SentimentStrat(bt.Strategy):
    params = (
        ('period', 15),
        ('printlog', True),
    )

    def log(self, txt, dt=None, doprint=False):
        ''' Logging function for this strategy'''
        if self.params.printlog or doprint:
            dt = dt or self.datas[0].datetime.date(0)
            print('%s, %s' % (dt.isoformat(), txt))

    def __init__(self):
        # Keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close
        # Keep track of pending orders
        self.order = None
        self.buyprice = None
        self.buycomm = None
        self.sma = bt.indicators.SimpleMovingAverage(
            self.datas[0], period=self.params.period)
        self.date = self.data.datetime
        self.sentiment = None
        Sentiment(self.data)
        
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(
                    'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                    (order.executed.price,
                     order.executed.value,
                     order.executed.comm))
                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm
            else:  # Sell
                self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
                         (order.executed.price,
                          order.executed.value,
                          order.executed.comm))
                
            self.bar_executed = len(self)     
            
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')
            
        # Write down: no pending order
        self.order = None
        
    def notify_trade(self, trade):
        if not trade.isclosed:
            return

        self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' %
                 (trade.pnl, trade.pnlcomm))
    
    ### Main Strat ###
    def next(self):
        # log closing price of the series from the reference
        self.log('Close, %.2f' % self.dataclose[0])
        
        date = bt.num2date(self.date[0]).date()
        prev_sentiment = self.sentiment
        if date in date_sentiment:
            self.sentiment = date_sentiment[date]
        
        # Check if an order is pending. if yes, we cannot send a 2nd one
        if self.order:
            return
        print(self.sentiment)
        # If not in the market and previous sentiment not none
        if not self.position and prev_sentiment:
            # buy if current close more than sma AND sentiment increased by >= 0.5
            if self.dataclose[0] > self.sma[0] and self.sentiment - prev_sentiment >= 0.5:
                self.log('BUY CREATE, %.2f' % self.dataclose[0])
                self.order = self.buy()
                
        # Already in the market and previous sentiment not none
        elif prev_sentiment:
            # sell if current close less than sma AND sentiment decreased by >= 0.5
            if self.dataclose[0] < self.sma[0] and self.sentiment - prev_sentiment <= -0.5:
                self.log('SELL CREATE, %.2f' % self.dataclose[0])
                self.order = self.sell()

    def stop(self):
        self.log('(MA Period %2d) Ending Value %.2f' %
                 (self.params.period, self.broker.getvalue()), doprint=True)

In [112]:
earliest_date

datetime.date(2017, 8, 10)

In [114]:
cerebro = bt.Cerebro()

# Strategy
cerebro.addstrategy(SentimentStrat)

# Data Feed

data = bt.feeds.YahooFinanceData(
        dataname = 'CLK19.NYM',
        fromdate = earliest_date,
        todate = datetime.datetime(2019,4,14),
        reverse = False
    )
cerebro.adddata(data)

cerebro.broker.setcash(100000.0)
cerebro.addsizer(bt.sizers.FixedSize, stake=10)
cerebro.broker.setcommission(commission=0.001)
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())

cerebro.plot()

Starting Portfolio Value: 100000.00


TypeError: must be real number, not LineBuffer

## with Quandl feed

In [None]:
# data = bt.feeds.Quandl(
#     dataname = 'FB',
#     apikey='LdFUhrZS8ve4PWCpujPf',
#     fromdate = earliest_date,
#     todate = datetime.datetime(2019,4,14),
#     reverse = False)

In [None]:
# import quandl
# quandl.ApiConfig.api_key = "LdFUhrZS8ve4PWCpujPf"

# diff_df = quandl.get('WIKI/FB')
# # df.rename(columns={'settle': 'close','prev_day_open_interest':'openinterest'}, inplace=True)
# # data = bt.feeds.PandasData(dataname=diff_df,datetime="date")

# diff_df.tail()