# Setup Params

In [25]:
import datetime

now = datetime.datetime.now()
now = str(now)
SYMBOLS = ['FB', 'AAPL', 'MSFT', 'AMZN', 'IBM', 'NFLX', 'GOOGL', 'INTC', 'TCEHY', 'NVDA']
SEARCH_TERMS = ['facebook', 'apple', 'microsoft', 'amazon', 'ibm', 'netflix', 'google', 'intel', 'tencent', 'nvidia']
SAVE_DIR = 'data/mycollection/{}'.format(now)
os.mkdir(SAVE_DIR)

# Scraper Functions

## Helper Functions

In [26]:
from urllib.request import urlopen, HTTPError
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import time
import pprint
import json

def get_from_businesstimes(search_word):
    date_sentiments = {}
    for i in range(1,11):
        page = urlopen('https://www.businesstimes.com.sg/search/{}?page='.format(search_word) + str(i)).read()
        soup = BeautifulSoup(page, features="html.parser")
        posts = soup.findAll("div", {"class": "media-body"})
        for post in posts:
            time.sleep(1)
            url = post.a['href']
            date = post.time.text
            print(date, url)
            try:
                link_page = urlopen(url).read()
            except HTTPError as e:
                if e.code == 403:
                    continue
                url = url[:-2]
                link_page = urlopen(url).read()
            link_soup = BeautifulSoup(link_page)
            sentences = link_soup.findAll("p")
            passage = ""
            for sentence in sentences:
                passage += sentence.text
            date_sentiments.setdefault(date, []).append(passage)
            json.dump(date_sentiments, open( 'businesstimes_{}_{}.json'.format(search_word, now), 'w' ))
    return date_sentiments

def flatten(x):
    result = []
    for el in x:
        if hasattr(el, "__iter__") and not isinstance(el, (str, bytes)):
            result.extend(flatten(el))
        else:
            result.append(el)
    return result

def flatten_texts(date_sentiments):
    processed_text = {}
    for k,v in date_sentiments.items():
        temp = flatten(v)
        seperator = ', '
        temp = seperator.join(temp)
        processed_text[datetime.strptime(k, '%d %b %Y').date() + timedelta(days=1)] = temp
    return processed_text

# AlphaVantage

## Helper Functions

In [27]:
from alpha_vantage.timeseries import TimeSeries
import matplotlib.pyplot as plt

alpha_vantage_APIKEY = '4FSNXUSUS1Y22Q4A'

def get_financial_data(symbol):
    ts = TimeSeries(key=alpha_vantage_APIKEY, output_format='pandas')
    data, meta_data = ts.get_intraday(symbol=symbol, interval='60min', outputsize='full')
    #rename the columns to something more sensible
    data.rename(columns={'1. open':'Open',
                      '2. high':'High',
                      '3. low':'Low',
                      '4. close':'Close',
                      '5. volume':'Volume',
                    }, inplace=True)

    #get end of day only
    cyclic_data = data.iloc[6::7, :]
    #subtract the difference
    cyclic_data.Close = cyclic_data.Close.diff(periods=-1)
    #drop the latest value (ie. there's no tomorrow minus today)
    cyclic_data = cyclic_data[:-1]
    return cyclic_data

In [28]:
def set_intervals(item):
    if item > 0:
        return 1
    elif item < 0:
        return 0

def process_financial_data(data):
    new_frame = data.reset_index()
    new_frame['date'] = pd.to_datetime(new_frame['date']).dt.date
    new_frame.Close.diff(periods=-1)
    new_frame['diff'] = new_frame['Close'].map(set_intervals)
    return new_frame

# Complete Pipe

## Helper Functions

In [29]:
import pandas as pd

def intersection(lst1, lst2): 
    return list(set(lst1) & set(lst2)) 

def get_save_data(symbol, search_term):
    news_data = get_from_businesstimes(search_term)
    news_data = flatten_texts(news_data)
    train_df = pd.DataFrame.from_dict(news_data.items())

    earliest_date = min(news_data.keys())
    latest_date = max(news_data.keys())
    
    cyclic_data = get_financial_data(symbol)
    new_frame = process_financial_data(cyclic_data)
    
    shared_dates = intersection(train_df[0], new_frame['date'])
    
    train_df = train_df[train_df[0].isin(shared_dates)]
    new_frame = new_frame[new_frame['date'].isin(shared_dates)]
    
    compiled_frame = train_df[::-1]
    compiled_frame['target'] = new_frame['diff'].values
    compiled_frame.target.astype(int)
    compiled_frame
    
    file_name = '{}/{}_{}'.format(SAVE_DIR, symbol, now)
    compiled_frame.to_csv(file_name, sep='\t')

In [30]:
for symbol, search_terms  in zip(SYMBOLS, SEARCH_TERMS):
    get_save_data(symbol, search_terms)

13 Apr 2019 https://www.businesstimes.com.sg/technology/eu-copyright-revamp-targeting-google-facebook-set-for-approval-on-monday
13 Apr 2019 https://www.businesstimes.com.sg/technology/facebook-more-than-doubles-zuckerberg-compensation-to-us226m
12 Apr 2019 https://www.businesstimes.com.sg/technology/facebook-trying-to-stop-its-own-algorithms-from-doing-their-job
10 Apr 2019 https://www.businesstimes.com.sg/technology/facebook-enhances-memorialised-user-accounts
10 Apr 2019 https://www.businesstimes.com.sg/technology/canada-hails-facebook-ban-on-far-right-figures
09 Apr 2019 https://www.businesstimes.com.sg/government-economy/canada-hails-facebook-ban-on-far-right-figures
09 Apr 2019 https://www.businesstimes.com.sg/opinion/facebook-wants-a-faux-regulator-for-internet-speech-it-wont-happen
09 Apr 2019 https://www.businesstimes.com.sg/technology/headway-made-against-election-abuses-facebook
05 Apr 2019 https://www.businesstimes.com.sg/government-economy/facebook-vows-to-block-foreign-ad

09 Jan 2019 https://www.businesstimes.com.sg/technology/facebook-ceo-plans-2019-forums-on-techs-role-in-society
29 Dec 2018 https://www.businesstimes.com.sg/technology/facebook-ceo-zuckerberg-says-problems-will-take-years-to-fix
24 Dec 2018 https://www.businesstimes.com.sg/technology/why-the-ftc-is-taking-a-new-look-at-facebook-privacy
21 Dec 2018 https://www.businesstimes.com.sg/technology/lawsuit-adds-to-facebook-woes-on-data-protection
20 Dec 2018 https://www.businesstimes.com.sg/technology/5-ways-facebook-shared-your-data
18 Dec 2018 https://www.businesstimes.com.sg/consumer/facebook-watch-takes-on-youtube-with-pranksters-magicians-and-cartoons
15 Dec 2018 https://www.businesstimes.com.sg/technology/facebook-says-bug-opened-access-to-private-photos
14 Dec 2018 https://www.businesstimes.com.sg/technology/facebook-teaches-new-yorkers-about-privacy-at-pop-up-kiosk
11 Dec 2018 https://www.businesstimes.com.sg/technology/google-facebook-face-australia-crackdown-over-market-power
06 Dec 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


12 Apr 2019 https://www.businesstimes.com.sg/technology/apple-adds-more-suppliers-to-clean-energy-programme
11 Apr 2019 https://www.businesstimes.com.sg/technology/apple-adds-foxconn-chip-suppliers-to-clean-energy-programme
09 Apr 2019 https://www.businesstimes.com.sg/companies-markets/banks-should-worry-about-apple-card
06 Apr 2019 https://www.businesstimes.com.sg/technology/does-apple-want-to-sell-tv-subscriptions-or-change-the-world
04 Apr 2019 https://www.businesstimes.com.sg/technology/japan-display-to-supply-oled-screens-for-apple-watch
02 Apr 2019 https://www.businesstimes.com.sg/energy-commodities/saudi-aramco-is-world%E2%80%99s-most-profitable-company-beating-apple-by-far
02 Apr 2019 https://www.businesstimes.com.sg/technology/apple-supplier-japan-display-to-seek-funding-shares-surge
01 Apr 2019 https://www.businesstimes.com.sg/technology/troubled-apple-supplier-japan-display-to-seek-funding-shares-surge
30 Mar 2019 https://www.businesstimes.com.sg/consumer/apple-pulls-plug-on

04 Jan 2019 https://www.businesstimes.com.sg/government-economy/apples-cut-in-revenue-outlook-triggers-slump-for-suppliers
04 Jan 2019 https://www.businesstimes.com.sg/companies-markets/a-bite-of-apples-poison-and-markets-sell-in-a-flash
04 Jan 2019 https://www.businesstimes.com.sg/government-economy/apples-china-trouble-makes-trumps-trade-war-harder-to-defend
03 Jan 2019 https://www.businesstimes.com.sg/stocks/europe-shares-fall-as-apple-guidance-cut-whipsaws-tech-stocks
03 Jan 2019 https://www.businesstimes.com.sg/stocks/asia-markets-stage-slight-recovery-but-tech-firms-soured-by-apple
03 Jan 2019 https://www.businesstimes.com.sg/government-economy/apple-isn%E2%80%99t-the-only-casualty%C2%A0of-chinas-slowdown
03 Jan 2019 https://www.businesstimes.com.sg/technology/apple-cuts-outlook-sees-challenges-in-china-emerging-markets
21 Dec 2018 https://www.businesstimes.com.sg/technology/apple-risks-iphone-ban-in-germany-after-court-case-loss
20 Dec 2018 https://www.businesstimes.com.sg/techn

22 May 2018 https://www.businesstimes.com.sg/technology/microsoft-to-share-data-tools-to-speed-chinese-ai-development
14 May 2018 http://www.businesstimes.com.sg/technology/dont-skype-me-how-microsoft-turned-consumers-against-a-beloved-brand
09 May 2018 http://www.businesstimes.com.sg/hub/empowering-enterprise/ingram-micro-ties-up-with-microsoft-to-launch-cloud-platform-division
08 May 2018 http://www.businesstimes.com.sg/technology/microsoft-amazon-show-alexa-and-cortana-cozying-up-together
08 May 2018 https://www.businesstimes.com.sg/technology/microsoft-tries-a-new-role-moral-leader
27 Apr 2018 http://www.businesstimes.com.sg/technology/microsoft-gets-earnings-boost-from-cloud
14 Mar 2018 http://www.businesstimes.com.sg/technology/microsoft-plans-its-first-cloud-data-centres-in-middle-east
13 Mar 2018 http://www.businesstimes.com.sg/government-economy/trump-eyes-ex-microsoft-staffer-as-top-economic-aide
12 Mar 2018 http://www.businesstimes.com.sg/government-economy/trump-considers-e

12 Mar 2019 https://www.businesstimes.com.sg/banking-finance/amazon-google-forays-into-banking-seen-as-threat-by-rbcs-ceo
11 Mar 2019 https://www.businesstimes.com.sg/technology/amazon-hints-that-fighting-fakes-was-a-reason-behind-recent-big-vendor-purge
07 Mar 2019 https://www.businesstimes.com.sg/consumer/amazon-to-expand-real-world-store-presence
07 Mar 2019 https://www.businesstimes.com.sg/technology/amazon-to-close-all-us-pop-up-stores
04 Mar 2019 https://www.businesstimes.com.sg/consumer/with-ipo-due-uber-aims-to-be-amazon-of-transportation
01 Mar 2019 https://www.businesstimes.com.sg/consumer/business-leaders-lawmakers-to-amazon-please-come-back-to-new-york
26 Feb 2019 https://www.businesstimes.com.sg/technology/amazon-to-offer-more-than-1000-apprenticeships-in-britain
26 Feb 2019 https://www.businesstimes.com.sg/consumer/chinese-firm-behind-the-amazon-coat-hits-jackpot-in-us
25 Feb 2019 https://www.businesstimes.com.sg/consumer/chinese-firm-behind-the-amazon-coat-hits-jackpot-i

24 Oct 2018 https://www.businesstimes.com.sg/technology/amazon-lobbying-spend-hits-record-amid-pentagon-competition
23 Oct 2018 https://www.businesstimes.com.sg/technology/amazoncom-qualcomm-to-put-alexa-assistant-in-more-headphones
23 Oct 2018 https://www.businesstimes.com.sg/technology/amazon-lobbying-reaches-company-record-amid-pentagon-competition
17 Oct 2018 https://www.businesstimes.com.sg/technology/bezos-defends-amazons-bid-for-pentagon-cloud-project
16 Oct 2018 https://www.businesstimes.com.sg/government-economy/bezos-defends-amazon-effort-for-pentagon-cloud-project
12 Oct 2018 https://www.businesstimes.com.sg/technology/workers-will-earn-more-under-new-pay-plan-says-amazon
09 Oct 2018 https://www.businesstimes.com.sg/technology/amazon-india-denies-it-gives-select-sellers-preferential-treatment
05 Oct 2018 https://www.businesstimes.com.sg/technology/apple-amazon-deny-bloomberg-report-on-chinese-hardware-attack
04 Oct 2018 https://www.businesstimes.com.sg/technology/amazon-wage

14 Dec 2016 http://www.businesstimes.com.sg/technology/ibm-lays-out-plans-to-hire-25000-in-us-ahead-of-trump-meeting
14 Dec 2016 http://www.businesstimes.com.sg/government-economy/ibm-unveils-plan-to-hire-25000-in-us-on-eve-of-trump-meeting
25 Nov 2016 http://www.businesstimes.com.sg/government-economy/ibm-failed-on-australias-census-turnbull
23 Nov 2016 http://www.businesstimes.com.sg/technology/ibm-to-triple-its-cloud-data-centres-in-uk-despite-brexit
22 Nov 2016 http://www.businesstimes.com.sg/technology/ibm-invests-in-uk-data-centres-as-tech-sector-defies-brexit-worries
15 Nov 2016 http://www.businesstimes.com.sg/technology/financial-players-get-to-know-customers-better-in-ibm-fintech-project
28 Oct 2016 http://www.businesstimes.com.sg/technology/ibms-watson-creates-customer-specific-business-insights
27 Oct 2016 http://www.businesstimes.com.sg/technology/ibm-slack-team-up-to-build-smarter-data-crunching-chatbots
25 Oct 2016 http://www.businesstimes.com.sg/technology/ibm-apologises