In [2]:
import requests
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer 
import matplotlib.pyplot as plt
import json
from datetime import date, timedelta
import datetime as dt
import yfinance as yf



# Create a function to extract selected stock data for further anaysis
## Resource: stocktwits, finviz, yahoo finance
## Output:            
            1. df_stocktwits : a dataframe stores with investors' comment for selected stocks 
            2. df_news_titles : a dataframe stores with news titles for selected stocks 
            3. df_rating : a dataframe stores with rating for selected stocks 
            4. df_hist_data : a dataframe stores with 5 years history data for selected stocks 

In [22]:
# get stock dataframe by Symbols in xxx days
def getStockDataframe(symbols, in_days):
    
    df_stocktwits = pd.DataFrame(columns = ['symbol', 'text', 'sentiment', 'time'])
    start_date = dt.datetime.now() - dt.timedelta(days=in_days)
    start_date = start_date.date()  
    for symbol in symbols:
        ChangeSymbol = False
        maxCursor = ''        
        while ChangeSymbol == False:

            # fetch people's comment data from stocktwits by suing API
            r1 = requests.get('https://api.stocktwits.com/api/2/streams/symbol/' + symbol + '.json?filter=top&limit=20&max=' + maxCursor)
            data = r1.json()
            if r1.status_code != 200:
                print('make more than 200 requests an hour')
                print(data)
                return df_stocktwits

            # update maxCursor
            maxCursor = str(data['cursor']['max'])

            # parse data and put into dataframe
            rows = []
            for msg in data['messages']:
                text = msg['body']
                time_str = msg['created_at'][:10]
                sentiments = msg['entities']['sentiment']
                symbol = data['symbol']['symbol']
                if sentiments != 'null':
                    sentiment = str(sentiments)[11:-2]
                else :
                    sentiment = ''
                date_time_obj = dt.datetime.strptime(time_str, '%Y-%m-%d')
                date_time_obj = date_time_obj.date()

                # check messages are enough 
                if date_time_obj <= start_date:
                    ChangeSymbol = True
                    break

                rows.append([symbol, text, sentiment, date_time_obj])

            df_stocktwits = df_stocktwits.append(pd.DataFrame(rows, columns = ['symbol', 'text', 'sentiment', 'time']), ignore_index=True)
            df_stocktwits = df_stocktwits.sort_values(by=['symbol'], ignore_index = True)
                            
    news_tables = {}
    parse_data_news = []
    for symbol in symbols:
        
        # scraping stock's news title from finviz
        url = 'https://finviz.com/quote.ashx?t=' + symbol + '&ty=c&ta=1&p=d'
        r2 = requests.get(url, headers={'user-agent': 'my-app'})
        soup1 = BeautifulSoup(r2.text, 'html')
        all_news=soup1.find(id="news-table")
        news_tables[symbol] = all_news
        
        # parse data and put into dataframe
        for symbol, news_table in  news_tables.items():
            for titles in news_table.findAll('tr'):
                datetime = titles.td.text
                title = titles.a.text
                channel = titles.span.text
                news_url = titles.a['href']
                if len(datetime.split(" ")) == 1:
                    time = datetime[0:7]
                else:
                    date = datetime[0:9]
                    time = datetime[11:17]
                date = pd.to_datetime(date)
                
                parse_data_news.append([symbol, date, time, title, channel, news_url])
                df_news_titles = pd.DataFrame(parse_data_news, columns =['symbol','date', 'time', 'title', 'channel', 'news_url'])
        df_news_titles = df_news_titles.append(pd.DataFrame(df_news_titles, columns =['symbol','date', 'time', 'title', 'channel', 'news_url']), ignore_index=True)
        df_news_titles = df_news_titles.sort_values(by=['symbol'], ignore_index = True)

        
    rating_tables = {}
    parse_data_rating = []       
    for symbol in symbols:
        
        # scraping stock's rating information from finviz
        url = 'https://finviz.com/quote.ashx?t=' + symbol + '&ty=c&ta=1&p=d'
        r2 = requests.get(url, headers={'user-agent': 'my-app'})
        soup2 = BeautifulSoup(r2.text, 'html')
        all_rating=soup2.findAll(class_="fullview-ratings-inner")
        rating_tables[symbol] = all_rating
        
        # parse data and put into dataframe
        for symbol, ratings in rating_tables.items():
            for rating in ratings:
                date= rating.text.split('\n')[1][:9]
                date = dt.datetime.strptime(date, '%b-%d-%y')
                date = date.date()
                action= rating.text.split('\n')[1][9:]
                organization= rating.text.split('\n')[2]
                suggestion= rating.text.split('\n')[3]
                target_price= rating.text.split('\n')[4]
                
                parse_data_rating.append([symbol, date, action, organization, suggestion, target_price])
        df_rating = pd.DataFrame(parse_data_rating, columns = ['symbol', 'date', 'action', 'organization', 'suggestion', 'target_price'])
        df_rating = df_rating.sort_values(by=['symbol'], ignore_index = True)
    
    
    df_hist_data= pd.DataFrame([],columns = ['symbol', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock', 'Splits'])
    
    # fetch 5 years stock history data by using Yahoo Finance API  
    for symbol in symbols:
        all_info = yf.Ticker(symbol)
        hist_data = all_info.history(period = 'max')
        hist_data['symbol']= symbol
        hist_data.reset_index(inplace=True)
        hist_data = hist_data.rename(columns = {'index': 'Date'})
        df_hist_data = df_hist_data.append(hist_data)
        
    return df_news_titles, df_stocktwits, df_rating, df_hist_data


df_news_titles, df_stocktwits, df_rating, df_hist_data= getStockDataframe(['ZM', 'SQ', 'FB'], 3)



In [25]:
df_news_titles

Unnamed: 0,symbol,date,time,title,channel,news_url
0,FB,2021-03-29,10:26AM,"Dow Falls, Tech Stocks Sell Off As Yields Rall...",Investor's Business Daily,https://www.investors.com/market-trend/stock-m...
1,FB,2021-03-31,07:18AM,Facebook announces new comment and sorting too...,The Independent,https://finance.yahoo.com/news/facebook-announ...
2,FB,2021-03-31,07:20AM,Google to give 25m to EU fund to 'fight misinf...,Yahoo Finance UK,https://finance.yahoo.com/news/google-will-giv...
3,FB,2021-03-31,08:00AM,Clubhouse App Has People Talking On The Newest...,Investor's Business Daily,https://www.investors.com/news/technology/club...
4,FB,2021-03-31,09:11AM,Facebook (FB) Relaunches WhatsApp Digital Paym...,Zacks,https://finance.yahoo.com/news/facebook-fb-rel...
...,...,...,...,...,...,...
1195,ZM,2021-03-10,08:16AM,Dow Jones Futures Rise But So Do Treasury Yiel...,Investor's Business Daily,https://www.investors.com/market-trend/stock-m...
1196,ZM,2021-03-10,06:36AM,4 Beaten-Down Nasdaq Stocks With 57% to 78% Up...,Motley Fool,https://www.fool.com/investing/2021/03/10/4-be...
1197,ZM,2021-03-10,06:02AM,Dow Jones Futures Rise With Final Biden Stimul...,Investor's Business Daily,https://www.investors.com/market-trend/stock-m...
1198,ZM,2021-03-14,1:13AM,"FOMC meeting, retail sales: What to know in th...",Yahoo Finance,https://finance.yahoo.com/news/fomc-meeting-re...


In [26]:
df_stocktwits

Unnamed: 0,symbol,text,sentiment,time
0,FB,"$FB \nIf Nasdaq is red tm, this is gonna drop 5+",,2021-04-06
1,FB,"$SPY $QQQ $FB $BTC.X $AAPL BEARS, just in case...",Bullish,2021-04-05
2,FB,$FB i like how all the bulls here are willing ...,Bearish,2021-04-04
3,FB,$MSFT $GOOGL $AMZN $FB $NFLX \nGetting Pumped ...,Bullish,2021-04-05
4,FB,$FB can’t use Facebook because my grandma repl...,,2021-04-05
...,...,...,...,...
1176,ZM,"$ZM Big Trade - $43,187,755\n133,312 shares at...",,2021-04-05
1177,ZM,$ZM,Bullish,2021-04-05
1178,ZM,🚨 WATCH LIST FOR TOMORROW PART 4 🚨 :\n\n$SNOW ...,Bullish,2021-04-05
1179,ZM,$ZM zm price action has been getting denied j...,Bullish,2021-04-05


In [27]:
df_rating

Unnamed: 0,symbol,date,action,organization,suggestion,target_price
0,FB,2020-07-31,Reiterated,Monness Crespi & Hardt,Buy,$230 → $290
1,FB,2020-07-31,Reiterated,Morgan Stanley,Overweight,$270 → $285
2,FB,2021-04-01,Initiated,Wolfe Research,Outperform,
3,FB,2021-03-29,Reiterated,Deutsche Bank,Buy,$355 → $385
4,FB,2021-01-21,Resumed,Piper Sandler,Neutral,$275
...,...,...,...,...,...,...
115,ZM,2020-09-01,Reiterated,Piper Sandler,Neutral,$211 → $411
116,ZM,2020-09-01,Reiterated,Robert W. Baird,Outperform,$300 → $465
117,ZM,2020-09-01,Reiterated,Rosenblatt,Buy,$260 → $350
118,ZM,2020-09-01,Upgrade,BTIG Research,Neutral → Buy,$500


In [29]:
df_hist_data

Unnamed: 0,symbol,Date,Open,High,Low,Close,Volume,Dividends,Stock,Splits,Stock Splits
0,ZM,2019-04-18,65.000000,66.000000,60.320999,62.000000,25764700,0,,,0.0
1,ZM,2019-04-22,61.000000,68.900002,59.939999,65.699997,9949700,0,,,0.0
2,ZM,2019-04-23,66.870003,74.168999,65.550003,69.000000,6786500,0,,,0.0
3,ZM,2019-04-24,71.400002,71.500000,63.160000,63.200001,4973500,0,,,0.0
4,ZM,2019-04-25,64.739998,66.849998,62.599998,65.000000,3863300,0,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...
2228,FB,2021-03-29,285.769989,293.179993,284.700012,290.820007,21718800,0,,,0.0
2229,FB,2021-03-30,289.829987,292.470001,286.700012,288.000000,17474500,0,,,0.0
2230,FB,2021-03-31,289.989990,296.500000,288.609985,294.529999,19498200,0,,,0.0
2231,FB,2021-04-01,298.399994,302.399994,296.600006,298.660004,17616000,0,,,0.0
