<a href="https://colab.research.google.com/github/kenwkliu/ideas/blob/master/colab/DataSource.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Google colab interactive table
%load_ext google.colab.data_table

!wget https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/plot.py
import plot

In [None]:
# get stocks daily data OHLCV (Open/High/Low/Close/Volume) from Yahoo
import pandas_datareader.data as web
from datetime import datetime

stock = '0293.HK'
start = '2021' # accepts strings
end = datetime(2021, 4, 29) # or datetime objects

stockDf= web.DataReader(stock, 'yahoo', start=start, end=end)
#print(stockDf.info())
stockDf

In [None]:
# Save the stock dataframe to csv for downloading
stockDf.to_csv('stockData.csv')

In [None]:
# Plot the OHLCV chart
plot.ohlcv(stockDf, 'Date', 'Open', 'High', 'Low', 'Close', 'Volume')

In [None]:
# Federal Reserve Economic Data (FRED) for currency exchange rate
# https://fred.stlouisfed.org/categories/94

rmbUsd = web.DataReader('DEXCHUS', 'fred', start=start, end=end)
rmbUsd

In [None]:
# Get oil price from Quandl
!pip install quandl
import quandl

oil = quandl.get('EIA/PET_RWTC_D').squeeze()
print(oil)
oil.plot(lw=2, title='WTI Crude Oil Price')

In [None]:
import pandas as pd

# read csv file from URL
# minute-by-minute HSI Index Future in 2014
url = 'https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/HSI-Future-2014.csv'
hsiFuture = pd.read_csv(url)

print("lines# ", len(hsiFuture))
hsiFuture[:1000]

In [None]:
# read Excel file from URL
# Apple Twitter msg (2016-Apr) 
url = 'https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/appleTweets.xlsx'
appleTweets = pd.read_excel(url)

# Look at the subset of useful columns 
COLUMNS = ['Date', 'User Name', 'Tweet content', 'Following', 'Hashtags']
appleTweets[COLUMNS][:5000]

In [None]:
# Google Trend: https://trends.google.com.hk/trends/trendingsearches/daily?geo=HK

# Google trends API
# https://pypi.org/project/pytrends/
!pip install pytrends

In [None]:
import pandas as pd                         
from pytrends.request import TrendReq 

pytrend = TrendReq()
pytrend.trending_searches(pn = 'hong_kong')

In [None]:
pytrend.build_payload(kw_list=['hong kong', '香港'], timeframe='2021-1-1 2021-2-1')
related_queries = pytrend.related_queries()

In [None]:
related_queries['hong kong']['top']

In [None]:
related_queries['hong kong']['rising']

In [None]:
kw_list=['gme', 'amc']

pytrend.get_historical_interest(kw_list, 
                                 year_start=2020, month_start=12, day_start=1, hour_start=0,
                                 year_end=2021, month_end=2, day_end=1, hour_end=0, sleep=0)
pytrend.interest_over_time()

In [None]:
# Weather forecast from open API in JSON format
import pandas as pd
import json
import requests

url = 'https://data.weather.gov.hk/weatherAPI/opendata/weather.php?dataType=fnd&lang=en'
data = json.loads(requests.get(url).text)
df = pd.DataFrame(data['weatherForecast'])
df

In [None]:
# read html table
import pandas as pd

sp_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp = pd.read_html(sp_url, header=0)[0] # returns a list for each table
print(sp.info())
sp

In [None]:
# webscraping library
import requests
from bs4 import BeautifulSoup

# Use Webscraping to extract HK stock Chinese names from wiki web site
hk_url = 'https://zh-yue.wikipedia.org/wiki/%E9%A6%99%E6%B8%AF%E4%B8%8A%E5%B8%82%E5%85%AC%E5%8F%B8%E4%B8%80%E8%A6%BD'
html = requests.get(hk_url)
soup = BeautifulSoup(html.text, 'html.parser')
soup

In [None]:
# Read the tags line by line and scrape the stock code and names
code = []
name = []
STOCK_SUFFIX = '.HK'

a_tags = soup.find("div", attrs={"id":"mw-content-text"})
all_li = a_tags.find_all("li", attrs={"class":""})

for li in all_li:
    content = li.text.strip()
    code.append(str(content[:4] + STOCK_SUFFIX))
    name.append(content[4:].strip())
    
chiNames = pd.DataFrame(index=code, data=name)
chiNames = chiNames.reset_index()
chiNames.columns = ["code", "chiName"]
chiNames

In [None]:
# Example 2: Extract New York Resturant Names
# set and request url; extract html source code
url = "https://www.opentable.com/new-york-restaurant-listings"
html = requests.get(url)
html.text[:500]

In [None]:
# parse raw html => soup object
soup = BeautifulSoup(html.text, 'html.parser')

# for each span tag, print out text => restaurant name
for entry in soup.find_all(name='span', attrs={'class':'rest-row-name-text'}):
  print(entry.text)