In [1]:
## Create webchart of ozbargain polls

In [28]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 
from math import pi
import os

from bokeh.io import show, save, output_file
from bokeh.plotting import figure, curdoc
from bokeh.palettes import TolRainbow, Sunset
from bokeh.transform import cumsum


In [3]:
import logging
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.INFO)


In [7]:
def find_all_active_polls():
    """
    Finds all active polls on Ozbargain
    """
    url = "https://www.ozbargain.com.au/forum/polls"
    page = requests.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    all_polls =soup.find_all("td", {'class': "topic"})

    poll_ids = []
    LOGGER.info('Sourcing active polls')
    for poll in all_polls:
        is_expired = poll.find("span", class_="marker expired")
        if not is_expired:
            url_poll_id = poll.select('a')[0].get('href')
            poll_id = url_poll_id.split('/')[-1]
            poll_ids.append(poll_id)

    return poll_ids

In [22]:
from bokeh.palettes import __palettes__, all_palettes

In [26]:
Iridescent[3][]

('#FEFBE9', '#FCF7D5', '#F5F3C1')

In [25]:
tuple(__palettes__[:10])

('Accent3',
 'Accent4',
 'Accent5',
 'Accent6',
 'Accent7',
 'Accent8',
 'Blues3',
 'Blues4',
 'Blues5',
 'Blues6')

In [57]:

def generate_poll_webchart(id):
       "Generates a pie chart for a given poll"
       prefix_url = "https://www.ozbargain.com.au/node/"
       url = prefix_url + str(id)
       page = requests.get(url)
       soup = BeautifulSoup(page.content, 'html.parser')
       poll = soup.find(id="poll")
       LOGGER.info('Parsing data for %s', id)
       # scraping data
       try: 
              span_vote = poll.find_all("span", class_="nvb voteup")
              span_options = poll.find_all("span", class_="polltext")
              options = [option.get_text() for option in span_options]
              votes = [int(vote.get_text()) for vote in span_vote]
              title = soup.find("title").text.split(" - ")[0]
       except AttributeError:
              LOGGER.info('No data found for %s', id)
              return
              
       if options:
              x = dict(zip(options, votes))
              data = pd.Series(x).reset_index(name='value').rename(columns={'index': 'options'})
              data['angle'] = data['value']/data['value'].sum() * 2*pi
              if len(x) > 2:
                     data['color'] = TolRainbow[len(x)]
              elif (len(x) > 0) and (len(x) <=2): 
                     data['color'] = Sunset[3][:len(x)]
              
              # checks whether an options exists
              # set theme 
              curdoc().theme='light_minimal'

              # create figure
              p = figure(width=1000, height=1000, title=f"{title}",
                     tooltips="@options: @value", x_range=(-0.5, 1.0))


              p.wedge(x=0, y=1, radius=0.4,
                     start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                     line_color="white", fill_color='color', legend_field='options', source=data)

              p.axis.axis_label = None
              p.axis.visible = False
              p.grid.grid_line_color = None

              output_path = f"outputs/"
              if not os.path.exists(output_path):
                     os.makedirs(output_path)

              LOGGER.info('Generating webchart for poll: %s', id)
              # setting output 
              output_file(filename=f"{output_path}/{id}.html", title=title)

              save(p)

In [58]:
if __name__ == "__main__":
    # active_polls = find_all_active_polls()
    active_polls = [741922]
    for poll_id in active_polls:
        generate_poll_webchart(poll_id)

INFO:__main__:Parsing data for 741922


In [51]:
id = 736920
prefix_url = "https://www.ozbargain.com.au/node/"
url = prefix_url + str(id)
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
poll = soup.find(id="poll")
# scraping data
try: 
        span_vote = poll.find_all("span", class_="nvb voteup")
        span_options = poll.find_all("span", class_="polltext")
        options = [option.get_text() for option in span_options]
        votes = [int(vote.get_text()) for vote in span_vote]
        title = soup.find("title").text.split(" - ")[0]
except AttributeError:
        LOGGER.info('No data found for %s', id)
        return


# set theme 
curdoc().theme='light_minimal'

# setting output 
# output_file(filename=f"{poll_id}.html", title=title)

# create figure
p = figure(width=1000, height=1000, title=f"{title}",
tooltips="@options: @value", x_range=(-0.5, 1.0))

x = dict(zip(options, votes))
data = pd.Series(x).reset_index(name='value').rename(columns={'index': 'options'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
if len(x) > 2:
        data['color'] = TolRainbow[len(x)]
elif (len(x) > 0) and (len(x) <=2): 
        data['color'] = Sunset[3][:len(x)]



p.wedge(x=0, y=1, radius=0.4,
start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
line_color="white", fill_color='color', legend_field='options', source=data)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)

# customisation
# p.xgrid.grid_line_color = None
# p.xaxis.axis_label_text_font="source"
# p.xaxis.axis_label = "Options"
# p.yaxis.axis_label = "Votes"
# p.y_range.start = 0


# output_path = f"outputs/"
# if not os.path.exists(output_path):
#     os.makedirs(output_path)

# LOGGER.info('Generating webchart for poll: %s', id)

<div class="block ozbpoll" data-expired="0" data-nid="736920" data-revote="0" id="poll">
<h3 class="blocktitle clearfix">
<span>Poll Options</span>
<span class="options">
</span>
</h3>
<div class="blockcontent spacer">
<ul>
<li data-oid="25967" id="poll-25967">
<div class="n-vote"><span class="nvb voteup"><i class="fa fa-plus"></i><span>21</span></span></div>
<div class="polltext">
<span class="polltext">No</span> </div>
</li>
<li data-oid="25966" id="poll-25966">
<div class="n-vote"><span class="nvb voteup"><i class="fa fa-plus"></i><span>2</span></span></div>
<div class="polltext">
<span class="polltext">Yes</span> </div>
</li>
</ul>
<div class="meta"></div>
</div>
</div>


In [6]:
generate_poll_webchart(poll_id)