In [1]:
#Import statements
import json
import requests
import pandas as pd
import numpy as np
import datetime
from pandas.io.json import json_normalize
from pathlib import Path
import os
import csv
import time
from dateutil import tz
import demoji
from collections import Counter
import ast
demoji.download_codes()

[33mDownloading emoji data ...[0m
[92m... OK[0m (Got response in 0.29 seconds)
[33mWriting emoji data to C:\Users\Russell\.demoji/codes.json ...[0m
[92m... OK[0m


In [7]:
from_zone = tz.gettz('UTC')
to_zone = tz.gettz('America/New_York')

# This is the current list the NYSE will be shut down to observe holidays
holidayList = [datetime.datetime(2020, 4, 10), datetime.datetime(2020, 5, 25), datetime.datetime(2020, 7, 3), datetime.datetime(2020, 9, 7), datetime.datetime(2020, 11, 26), datetime.datetime(2020, 12, 25),
              datetime.datetime(2021, 1, 1), datetime.datetime(2021, 1, 18), datetime.datetime(2021, 2, 15), datetime.datetime(2021, 4, 2), datetime.datetime(2021, 5, 31), datetime.datetime(2021, 7, 5),
              datetime.datetime(2021, 9, 6), datetime.datetime(2021, 11, 25), datetime.datetime(2021, 12, 24), datetime.datetime(2022, 1, 17), datetime.datetime(2022, 2, 21), datetime.datetime(2022, 4, 15),
              datetime.datetime(2022, 5, 30), datetime.datetime(2022, 7, 4), datetime.datetime(2022, 9, 5), datetime.datetime(2022, 11, 24), datetime.datetime(2022, 12, 26)]

In [20]:
# The stock Symbols that we will iterate through
# Apple = AAPL
# Amazon = AMZN
# Google = GOOGL
# Microsoft = MSFT
# Dell = DELL
# IBM = IBM
# Intel = INTC
# HP = HPQ
# Facebook = FB
# Cisco Systems = CSCO
# Oracle = ORCL
# HP Enterprise = HPE
# Micron Tech = MU
# DXC Tech = DXC
# Thermo Fisher Scientific = TMO
stockSymbol = ["AAPL", "AMZN", "GOOGL","MSFT", "DELL", "IBM", "INTC", "HPQ",
               "FB", "CSCO", "ORCL", "HPE", "MU", "DXC", "TMO"]
key = 'AU74VSFGT1S37O4A'
# Get the current directory
current = os.getcwd()

### Stock Twits Extraction

In [8]:
def collect_New_Twits(res):
    try:
        
        #
        df = (pd.json_normalize(res['messages'])[{'id','body','created_at','entities.sentiment.basic','symbols'}])
        
        #Reorders the columns
        df = df[['id','body','created_at','entities.sentiment.basic','symbols']]
        
        #Renames the columns
        df = df.rename(columns = {'created_at':'created', 'entities.sentiment.basic': 'tag'})
        
    except:
        df = (pd.json_normalize(res['messages'])[{'id','body','created_at','entities.sentiment','symbols'}])
               
        #Reorders the columns and gets rid of old symbols column
        df = df[['id','body','created_at','entities.sentiment', 'symbols']]
        
        #Renames the columns
        df = df.rename(columns = {'created_at':'created', 'entities.sentiment': 'tag'})
        

    #The following loops reformat the symbols column
    #It creates a list of dictionaries, {symbol: "symbol of company mentioned", title: "name of company"}
    dataList=[]
    
    for index in df['symbols']:
        symbolsList = []

        for diction in index:

            symbolsList.append({'symbol' : diction.get('symbol'), 'title' : diction.get('title')})
            
        dataList.append(symbolsList)
        
    df.insert(5, "newSymbols" ,dataList)
    
    df = df[['id','body','created','tag', 'newSymbols']]
    
    df = remove_emojis(df)

    #Replaces the NAN with a string "none"
    df = df.replace(np.nan, 'none', regex=True)
    
    #Replaces the string as a datetime variable
    dateFormat = "%Y-%m-%dT%H:%M:%SZ"
    df['created'] = pd.to_datetime(df['created'], format=dateFormat)
    
    df = df.sort_values(by = 'id', ascending = False)
    
    #returns the dataframe in correct format
    return df

In [9]:
##This function takes a Stocktwits dataframe and removes the emojis from the twit.
#This function then returns a dataframe with an extra column for the twit without emojis
def remove_emojis(dataframe):
    cleanSentList=[]
    
    for row in dataframe.body:
        cleanSentList.append(demoji.replace(row))
        
    dataframe.insert(5, 'cleanSents', cleanSentList)
    
    dataframe = dataframe[['id', 'cleanSents', 'created','tag', 'newSymbols']]
    
    return dataframe

In [18]:
def gather_twits():
    for symbol in stockSymbol:

        symbolFolder = str(current)+"\\{}folder".format(symbol)

        #selects the file to add to
        file = symbolFolder+'\\{}_twits.csv'.format(symbol)

        historicalTwits = pd.read_csv(file)
        newestID = historicalTwits['id'].iloc[0]

        url = "https://api.stocktwits.com/api/2/streams/symbol/{}.json".format(symbol)
        try:
            response = requests.get(url, params = {'since' : newestID}).json()

        except:
            print("error getting request")
            try:
                response = requests.get(url, params = {'since' : newestID}).json()
            except:
                print("Second error getting request")
                continue
        if response['response']['status'] == 429:
            print("requests exceeded")
            time.sleep(600)
            continue

        if pd.json_normalize(response['messages']).empty:
            time.sleep(15)

            time.sleep(15)
            continue

        tempTwitsDf = collect_New_Twits(response)

        newHistoricalTwits = tempTwitsDf.append(historicalTwits)

        newHistoricalTwits = newHistoricalTwits.drop_duplicates(subset = 'id')

        newHistoricalTwits.to_csv(file, index=False)
        time.sleep(15)

        time.sleep(15)

### Stock Value Extraction

In [None]:
key = 'AU74VSFGT1S37O4A'
stockSymbol = ["AAPL", "AMZN", "GOOGL","MSFT", "DELL", "IBM", "INTC", "HPQ",
               "FB", "CSCO", "ORCL", "HPE", "MU", "DXC", "TMO"]

In [None]:
#This gets the parent directory of the current directory
current = os.getcwd()

In [None]:
def format_daily_api(stockResponse):
    newDf = pd.DataFrame(columns =["open", "high", "low", "close", 'volume','time'])
    
    for times in stockResponse['Time Series (Daily)'].keys():
        timestamp = datetime.strptime(times, '%Y-%m-%d')
        ope = stockResponse['Time Series (Daily)'][times]['1. open']
        high = stockResponse['Time Series (Daily)'][times]['2. high']
        low = stockResponse['Time Series (Daily)'][times]['3. low']
        close = stockResponse['Time Series (Daily)'][times]['4. close']
        volume = stockResponse['Time Series (Daily)'][times]['5. volume']
        
        newDf = newDf.append({"open":ope, "high":high, "low":low,
                          "close":close, "volume":volume, "time":timestamp}, ignore_index=True)
    newDf['time'] = pd.to_datetime(newDf['time'])
    
    return newDf

In [None]:
def collect_stock_market():
    for symbol in stockSymbol:
        dailyUrl = ('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={}&apikey='+key).format(symbol)
        stockRes = requests.get(dailyUrl).json()

        companyFolder = str(current)+"\\{}folder".format(symbol)

        file = companyFolder+'\\{}Daily.csv'.format(symbol)
        df=pd.read_csv(file)
        df.time = pd.to_datetime(df.time)

        stockAPIdf = format_daily_api(stockRes)

        ##Stack the dataframes on top of each other without the issue of different columns
        newDailyValueDf = stockAPIdf.append(df)


        newDailyValueDf['percentChange'] = newDailyValueDf['close'].astype(float).pct_change(periods=-1)
        newDailyValueDf['percentVol'] = newDailyValueDf['volume'].astype(float).pct_change(periods=-1)

        dataframeValue = newDailyValueDf[{'open', 'high', 'low', 'close', 'volume', 'time', 'percentChange', 'percentVol'}]

        dataframeValue['time'] = pd.to_datetime(dataframeValue['time'])

        dataframeValue = dataframeValue.sort_values(by= 'time', ascending = False)

        dataframeValue = dataframeValue.drop_duplicates(subset='time')

        dataframeValue = dataframeValue[{'open', 'high', 'low', 'close', 'volume', 'time', 'percentChange', 'percentVol'}]

        dataframeValue.to_csv(file, index=False)
        time.sleep(30)
        time.sleep(30)

    for stock in stockSymbol:

        newDf = pd.DataFrame(columns =["open", "high", "low", "close", 'volume','time'])


        url = ('https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={}&interval=1min&outputsize=full&apikey='+key).format(stock)
        stockRes = requests.get(url).json()

        companyFolder = str(current)+"\\{}folder".format(stock)

        file = companyFolder+'\\{}Values.csv'.format(stock)
        df=pd.read_csv(file)
        df.time = pd.to_datetime(df.time)

        for times in stockRes['Time Series (1min)'].keys():
            timestamp = datetime.strptime(times, '%Y-%m-%d %H:%M:%S')
            ope = stockRes['Time Series (1min)'][times]['1. open']
            high = stockRes['Time Series (1min)'][times]['2. high']
            low = stockRes['Time Series (1min)'][times]['3. low']
            close = stockRes['Time Series (1min)'][times]['4. close']
            volume = stockRes['Time Series (1min)'][times]['5. volume']

            newDf = newDf.append({"open":ope, "high":high, "low":low, "close":close, "volume":volume, "time":timestamp}, ignore_index=True)

        dataframeValue = newDf.append(df)

        dataframeValue = dataframeValue[{'open', 'high', 'low', 'close', 'volume', 'time'}]

        dataframeValue['time'] = pd.to_datetime(dataframeValue['time'])

        dataframeValue = dataframeValue.drop_duplicates(subset='time')

        dataframeValue = dataframeValue.sort_values(by = 'time', ascending = False)

        dataframeValue.to_csv(file, index=False)
        time.sleep(60)

## Time Management

In [21]:
# If the today is Mon-Fri


nowUTC = datetime.datetime.today().replace(tzinfo=from_zone)
nowEST = nowUTC.astimezone(to_zone)


if (nowEST.weekday() < 5) and (nowEST not in holidayList):

    if nowEST.hour == 9 and nowEST.minute == 0:
        
        print("compute predictions")
    elif nowEST.hour == 23 and nowEST.minute < 10:
        collect_stock_market()
        print("Gather stock market values")
  
    print("Stock Twits data")
    gather_twits()

else:
    print("gather Stock Twits data")
    gather_twits()



Stock Twits data


KeyboardInterrupt: 