## 1. Import Library

### 1.1. Import Standard Library

In [None]:
import itertools
import pandas as pd

### 1.2. Import Parser Library

In [None]:
import requests
from bs4 import BeautifulSoup

### 1.3. Import Interactive Graphing Library: Bokeh

In [None]:
from bokeh.palettes import viridis
from bokeh.models import ColumnDataSource, DatetimeTickFormatter, HoverTool,  NumeralTickFormatter
from bokeh.io import show, output_file, output_notebook
from bokeh.plotting import figure, output_notebook

## 2. Web Scraping

### 2.1. Scraping Contents of Main Page

In [None]:
resp = requests.get('http://www.stockq.org/market/commodity.php')
resp.encoding='utf-8'
soup = BeautifulSoup(resp.text, 'html.parser')
print(soup)

### 2.2. Scraping Contents of Subpages

In [None]:
# Tag "a" is included in Tag "td."
td = soup.find_all('td')
# Tag "a" contains hyperlinks of listed commodities.
hrefs = []
comms = []

for s in td:
    try:
        a = s.find('a', href=True)
        hrefs.append("http://www.stockq.org" + a['href'])
        comms.append(s.find('a').text)
    except:
        pass

## 3. Clean Dataset

### 3.1. Extract Data

In [None]:
# Ignore data that is not relevant to commodites.   
comms = comms[11:-2]
# Hyperlinks of corresponding commodities.
hrefs = hrefs[11:-2]

date = []
price = []

for href in hrefs: 
    
    # Extract data from the subpage of corresponding commoditiy. . 
    resp = requests.get(href)
    resp.encoding='utf-8'
    soup = BeautifulSoup(resp.text, 'html.parser')
    
    # Price for each trading session of the specified commodity is listed in:
    # Level 1: 3rd Tag "table" with Class named "indexpagetable."
    # Level 2: Tag "tr" with Class named "row1" and "row2,"
    # where "row1" stands for "odd rows" of the table
    # while "row2" stands for "even rows" of the table.
    # Level 3: 1st and 4th Tag "td" include the "dates" of trading sessions
    # while 2nd and 5th Tag "td" include the corresponding prices.
    tables = soup.find_all('table', {'class': 'indexpagetable'})
    table = tables[2]

    rows = table.find_all('tr', {'class': ['row1', 'row2']})    

    # .find: find the 1st one.
    for row in rows:
        td = row.find('td')
        date.append(td.text)   

    for row in rows:
        td = row.find_all('td')[3]
        date.append(td.text)

    for row in rows:
        td = row.find_all('td')[1]
        price.append(td.text)

    for row in rows:
        td = row.find_all('td')[4]
        price.append(td.text)  
    
    # Make the length of "List of Commodities" identical to that of "Date" and "Price" data
    # by repeating each commodity itself 20 times.   
    n = int(len(date)/len(comms))
    comm = list(itertools.chain.from_iterable((itertools.repeat(comm, n) for comm in comms)))

### 3.2. Create Dataframe

In [None]:
df = pd.DataFrame({'comm':comm, 'date': date, 'price': price})
df

### 3.3. Remove Data

In [None]:
# Specify commodies that are not within the analysis period.  
df[df['date'].str.contains('2018/09')]

In [None]:
# Exclude the specified data from the dataframe.
df = df[df['comm'] != '鈾']
df

In [None]:
# Remove the specified commodity from the list.
comms.remove('鈾')
comms

### 3.4. Split Dataframe into Chunks

In [None]:
# There are 20 datasets for each commodity.  
n = int(len(date)/len(comms))
# There are 540 rows in "df" and the interval should be set as 20 to split "df" into 27 even chunks. 
list_df = [df[i:i+n] for i in range(0,df.shape[0],n)]
list_df

## 4. Data Visualization

In [None]:
# Configure an output to a standalone HTML file.
output_file('4_InteractiveLineChart.html')

In [None]:
# Specify the display of tooltips.
# Datetime will be formatted as YYYY-MM-DD.
hover = HoverTool(tooltips=[('Commodity', '@Commodity'),('Date', '@Date{%F}'),('Price', '@Price')],
                  formatters={'Date': 'datetime'})

# Set overall formats of the plot.
p = figure(plot_width=800, plot_height=800, x_axis_type='datetime')
p.title.text = 'Commodity Prices'
p.add_tools(hover)

# Start Plotting.
for df, name, color in zip(list_df, comms, viridis(27)):
    
    # Convert argument to datetime.
    df['date'] = pd.to_datetime(df['date'])
    # Import data for plotting.
    source = ColumnDataSource(data={'Commodity': df['comm'], 'Date': df['date'], 'Price': df['price']})
    # Set overall formats of the line chart.
    r = p.line(x='Date', y='Price', line_width=2, color=color, alpha=0.8,
               muted_color=color, muted_alpha=0.2, legend=name, source=source)
    # Default line chart as muted.
    r.muted = True

# Location of legend.
p.legend.location = 'top_left'
# click on the legend item and the corresponding line will be muted or lighted up. 
p.legend.click_policy = 'mute'
# Add thousand separator to labels of Y-axis. 
p.yaxis.formatter = NumeralTickFormatter(format='0,0')

output_notebook()

show(p)