# Getting Options Contracts Data

In [18]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import re

class Process_soup:
    def __init__(self, soup):
        self.bs = soup
        self.price = float(self.get_underlying_price())
        self.expdate = self.get_expiration_date()
        
    
    def get_underlying_price(self):
        underlying_price = ""
        keywordIsFound = False
        for descendant in self.bs.descendants: # look all tags in document
            if descendant.name == "table":  # if one tag is a table
                for row in descendant.descendants:  # look all the tags under it
                    if row.name == 'tr': # if one tag is a table row
                        if keywordIsFound:
                            underlying_price = row.td.get_text()
                            break
                        for column in row.children: # loop all the children tags of the row
                            if column.name == 'td': # if a children tag is a table column
                                if (column.get_text()) == 'Price': # detects the key word, we know the price is next row first column
                                    keywordIsFound = True
                                    break # stop looking at the columns of the row
            if keywordIsFound:
                break
        return underlying_price
    def get_expiration_date(self):
        str_date = self.bs.find_all(text=re.compile("^Expiry:"))[0].split()
        if str_date:
            str_fmt = "{0} {1} {2}".format(str_date[1], str_date[2], str_date[3])
            return datetime.strptime(str_fmt,"%b %d, %Y")
        else:
            return None
    def get_ATM_contract_data(self):
        strike = int(self.price * 2)/2
        contract = "{0}P{1:05d}{2:03d}".format(self.expdate.strftime("%y%m%d"),int(strike),int((strike-int(strike))*1000))
        tag=soup.find(onclick=re.compile(contract))
        if not tag:
            strike=int(self.price)
            contract = "{0}P{1:05d}000".format(self.expdate.strftime("%y%m%d"),int(strike))
            tag=soup.find(onclick=re.compile(contract))
            if tag is None:
                strike=self.price-self.price%5
                contract = "{0}P{1:05d}000".format(self.expdate.strftime("%y%m%d"),int(strike))
                tag=soup.find(onclick=re.compile(contract))
                if tag is None:
                    return []
        return [contract, strike, tag]
    
    def get_ATM_option_data(self, tag):
        columns = tag.parent.parent.parent.find_all("td")
        return {'bid':columns[3].get_text(), 'ask':columns[4].get_text(),
                'volume':columns[6].get_text(), 'IV':columns[8].get_text()}
    
    def print_data(self, contract_data, option_data):
        print("underlying  : {}".format(self.price))
        print("contract    : {}".format(contract_data[0]))
        print("strike      : {}".format(contract_data[1]))
        print("expiration  : {}".format(self.expdate.strftime("%d %b, %Y")))
        print("bid         : {}".format(option_data['bid']))
        print("ask         : {}".format(option_data['ask']))
        print("volume      : {}".format(option_data['volume']))
        print("IV          : {}".format(option_data['IV']))
    

In [28]:
soup=None
with open("price_table_T.html", encoding="UTF-8") as fp:
    text = fp.read()
soup = BeautifulSoup(text, "lxml")
#print(soup.prettify())
ps = Process_soup(soup)
contract_data = ps.get_ATM_contract_data()
option_data = ps.get_ATM_option_data(contract_data[2])
ps.print_data(contract_data, option_data )

underlying  : 32.51
contract    : 180525P00032500
strike      : 32.5
expiration  : 25 May, 2018
bid         : 0.01
ask         : 0.04
volume      : 655
IV          : 14.78%


In [29]:
import requests
from lxml.html import fromstring


class Get_proxies():
    
    proxies_sources = [
    'https://free-proxy-list.net/',
    'https://www.us-proxy.org/',
    'https://free-proxy-list.net/uk-proxy.html',
    'https://free-proxy-list.net/anonymous-proxy.html',
    'https://www.sslproxies.org/'
    ]

    def get_proxies_from_url(self, url):
        response = requests.get(url)
        parser = fromstring(response.text)
        proxies = list()
        for i in parser.xpath('//tbody/tr')[:10]:
            if i.xpath('.//td[7][contains(text(),"yes")]'):
                #Grabbing IP and corresponding PORT
                proxy = ":".join([i.xpath('.//td[1]/text()')[0], i.xpath('.//td[2]/text()')[0]])
                proxies.append(proxy)
        return proxies
    
    def get_proxies(self):
        proxies = []
        for proxies_source in self.proxies_sources:
            proxies.extend(self.get_proxies_from_url(proxies_source))
        return set(proxies)

In [31]:
from itertools import cycle
import traceback
from lxml.html import fromstring
import time
from fake_useragent import UserAgent

tickers = ['SPY','HPQ', 'HPE', 'ILG', 'ON', 'T', 'GE', 'M', 'BAC', 'LEG']
proxies = Get_proxies()
user_agents = UserAgent()
proxy_pool = cycle(proxies.get_proxies())
for ticker in tickers:
    url = 'https://www.optionseducation.org/quotes.html?quote=' + ticker
    for i in proxy_pool:
        proxy = next(proxy_pool) # Get a proxy from the pool
        user_agent = user_agents.random   # Get a random user agent
        headers = {'User-Agent': user_agent}
        try:
            time.sleep(3)
            response = requests.get(url, proxies={"http": proxy, "https": proxy}, headers=headers, timeout = 5)
            soup_oic = BeautifulSoup(response.text, "lxml")
            with open("oic_page_" + ticker + ".html","w", encoding="UTF-8") as fp:
                fp.write(response.text)
            adress = ""
            for iframe in soup_oic.find_all('iframe'):
                if 'ivolatility' in iframe['src']:
                    address = iframe['src']
            req = requests.get(address, proxies={"http": proxy, "https": proxy}, headers=headers, timeout = 5)
            soup = BeautifulSoup(req.text, "lxml")
            with open("price_table_" + ticker + ".html","w", encoding="UTF-8") as fp:
                fp.write(req.text)
            ps = Process_soup(soup)
            print("  {}".format(ticker))
            contract_data = ps.get_ATM_contract_data()
            option_data = ps.get_ATM_option_data(contract_data[2])
            ps.print_data(contract_data, option_data )
            print("-------------------")
            break
        except Exception as e:
            #Most free proxies will often get connection errors. You will have retry the entire request using another proxy to work. 
            #We will just skip retries as its beyond the scope of this tutorial and we are only downloading a single url 
            print("Error: {}".format(e))
            print("Skipping. Connnection error, server {}".format(proxy))
        print("-------------------")
print("-------END---------")

  SPY
underlying  : 272.15
contract    : 180525P00272000
strike      : 272.0
expiration  : 25 May, 2018
bid         : 0.04
ask         : 0.06
volume      : 70866
IV          : 6.16%
-------------------
Error: HTTPSConnectionPool(host='www.optionseducation.org', port=443): Max retries exceeded with url: /quotes.html?quote=HPQ (Caused by ProxyError('Cannot connect to proxy.', OSError('Tunnel connection failed: 403 Forbidden',)))
Skipping. Connnection error, server 93.188.162.77:3128
-------------------
  HPQ
underlying  : 21.91
contract    : 180525P00021500
strike      : 21.5
expiration  : 25 May, 2018
bid         : 0.00
ask         : 0.01
volume      : 0
IV          : 44.93%
-------------------
Error: HTTPSConnectionPool(host='www.optionseducation.org', port=443): Max retries exceeded with url: /quotes.html?quote=HPE (Caused by ConnectTimeoutError(<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x0000000009DDDFD0>, 'Connection to 152.157.119.253 timed out. (conne