In [0]:
from tqdm import tqdm
import re
import os
import pandas as pd
from datetime import datetime

In [130]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=fn, length=len(uploaded[fn])))

Saving data.pkl to data (1).pkl
User uploaded file "data.pkl" with length 207383 bytes


In [0]:
import pickle
from tqdm import tqdm
import numpy as np

import nltk
from nltk.stem import WordNetLemmatizer

In [0]:
data = pickle.load(open('data.pkl', 'rb'))

nltk.download('punkt')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

In [0]:
def generate_article():
    
    for i in range(len(data['articles'])):
    
        text = data['articles'][i]
        tickers = [ticker.split(':')[-1] for ticker in data['tickers'][i]]
        targets = data['prices'][i]

        words = []
        is_ticker = []
        prices = []

        for word in nltk.word_tokenize(text):
            
            if word in tickers:
                is_ticker.append(True)
                ticker_index = tickers.index(word)
                prices.append(targets[ticker_index])
            else:
                word = word.lower()
                word = lemmatizer.lemmatize(word)

                is_ticker.append(False)
                prices.append('O')
                
            words.append(word)

        assert len(words) == len(is_ticker) == len(prices)
    
        yield words, is_ticker, prices

In [135]:
articles = [article for article in tqdm(generate_article())]

120it [00:00, 223.20it/s]


In [0]:
tags = pickle.load(open('tags.pkl', 'rb'))
word2ind = pickle.load(open('word2ind.pkl', 'rb'))
words = pickle.load(open('words.pkl', 'rb'))

In [0]:
max_len = 800

from keras.preprocessing.sequence import pad_sequences

X = [[word2ind[article_word] if article_word in word2ind else 10 for article_word in article[0]] for article in articles]
X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=word2ind['<pad>'])

prices = [[price for price in article[2]] for article in articles]

In [0]:
from keras.models import load_model

model = load_model('model.h5')

In [0]:
probs = model.predict(np.array(X))

In [0]:
portfolio = {}
log = []
profit = .0
spent = .0
sold = .0
shares_sold = 0
shares_bought = 0
max_profit = 0
best_deal = ''

preds = np.argmax(probs, axis=-1)

for pred_ind, pred in enumerate(preds):
    
    for i, p in enumerate(pred):
        ticker = words[X[pred_ind][i]]
        
        if ticker == '<pad>':
            continue
        price = prices[pred_ind][i]
        if price == -1 or price == None or price == 'O':
            continue
        
        if tags[p] == '<BUY>':
            log.append('buying share {} for {}'.format(ticker, price))
            shares_bought += 1
            spent += price
            if ticker in portfolio:
                portfolio[ticker].append(price)
            else:
                portfolio[ticker] = [price]
        
        if tags[p] == '<SELL>':
            if ticker in portfolio:
                buy_price = min(portfolio[ticker])
                log.append('selling share {} for {}, bought for {}'.format(ticker, price, buy_price))
                sold += buy_price
                shares_sold += 1
                if len(portfolio[ticker]) == 1:
                    del portfolio[ticker]
                else:
                    portfolio[ticker].remove(buy_price)
                profit += price - buy_price
                if (price - buy_price) / buy_price > max_profit:
                    max_profit = (price - buy_price) / buy_price
                    best_deal = ticker
            else:
                log.append('want to sell share {}, but didn\'t found it in portfolio'.format(ticker))
                
log.append(('Profit: {:.2f}$ ({:.2f}%); sold {} shares for the amount of {:.2f}$; ' + 
            'bought {} shares for the amount of {:.2f}$. Best deal was for {} with ' + 
            'profit of {:.2f}%.').format(
                profit, profit * 100 / sold, shares_sold, sold, shares_bought, 
                spent, best_deal, max_profit * 100))

In [155]:
log[-1]

'Profit: 78.41$ (0.90%); sold 21 shares for the amount of 8666.31$; bought 80 shares for the amount of 18069.16$. Best deal was for AMZN with profit of 7.48%.'

In [156]:
log

['buying share AAPL for 178.184',
 "want to sell share GS, but didn't found it in portfolio",
 'selling share AAPL for 190.225, bought for 178.184',
 "want to sell share PCG, but didn't found it in portfolio",
 'buying share GOOGL for 1066.997',
 "want to sell share COP, but didn't found it in portfolio",
 'buying share PFE for 43.61',
 "want to sell share JD, but didn't found it in portfolio",
 "want to sell share BX, but didn't found it in portfolio",
 "want to sell share JPM, but didn't found it in portfolio",
 "want to sell share WMB, but didn't found it in portfolio",
 "want to sell share TSLA, but didn't found it in portfolio",
 'buying share INFY for 8.965',
 'selling share INFY for 8.965, bought for 8.965',
 'selling share GOOGL for 1083.92, bought for 1066.997',
 "want to sell share AMZN, but didn't found it in portfolio",
 'buying share NFLX for 282.279',
 'buying share ADBE for 247.13',
 'buying share VRTX for 177.592',
 'buying share MA for 201.295',
 'buying share V for 14