In [87]:
import hashlib
import logging
import re
import requests
from bs4 import BeautifulSoup
import pickle
import pandas as pd

In [13]:
log = logging.getLogger(__name__)

In [57]:
def get_content(url):
    """Get url contents from web or local cache.
    
    This function impliments a simple cache using the Python Pickle library. 
    When called, the function computes the SHA1 sum of the URL. If a file 
    with that name exists, then the file is loaded and it's contents are 
    returned. If the file does not exist, a GET request is made and the 
    result is saved to disk for future use.
    """
    log.info('Getting contents for URL %s', url)
    file_name = ''.join([hashlib.sha1(url.encode('utf-8')).hexdigest(), '.pickle'])
    try:
        with open(file_name, 'rb') as file:
            log.info('Cache file opened, loading pickle.')
            r = pickle.load(file)
    except FileNotFoundError as e:
        log.info('Got FileNotFoundError, making GET request.')
        r = requests.get(url)
        with open(file_name, 'wb') as file:
            pickle.dump(r, file)
    return r.content

In [2]:
# 4s softcase under 700g
URL = r"https://hobbyking.com/en_us/batteries-chargers/batteries/lipo.html?config=54&battery_pack_type=2836&unit_weight_filterable=0-700&wrh=2%2C3"

In [181]:
soup = BeautifulSoup(get_content(URL))
product_cards = soup.find_all(class_='product-card')
products = []
for product in product_cards:
    p = {}
    p['Name'] = product.find('a', class_='link -name').get_text()
    p['URL'] = product.find('a', class_='link -name')['href']
    sub_soup = BeautifulSoup(get_content(p['URL']))
    try:
        p['Price'] = sub_soup.find(class_='price-box').find(class_='regular-price').find(class_='price').get_text()
    except AttributeError as _:
        p['Price'] = sub_soup.find(class_='price-box').find(class_='old-price').find(class_='price').get_text()
    specs_found = False
    for paragraph in sub_soup.find(id='tab-description').div.find_all():
        text = paragraph.get_text("\n")
        spec_names = ['Specs:', 'Spec.', 'Spec:']
        if ('Specs:' in text) or ('Spec.' in text) or ('Spec:' in text):
            specs_found = True
            for br in paragraph.find_all("br"):
                br.replace_with("\n")
            text = paragraph.get_text()
            text = text.split('\n')
            for line in text:
                split = line.split(':')
                if len(split) == 2 and split[0] and split[1]:
                    p[split[0]] = split[1]
                    
    for key in p.keys():
        if 'weight' in key.lower():
            p['Weight'] = p.pop(key)
        elif 'capacity' in key.lower():
            p['Capacity'] = p.pop(key)
        elif 'discharge plug' in key.lower():
            p['Discharge Plug'] = p.pop(key)
        elif 'charge plug' in key.lower():
            p['Balance Plug'] = p.pop(key)
        elif 'Constant Discharge' in key:
            p['Constant Discharge'] = p.pop(key)
        elif 'Peak Discharge' in key:
            p['Peak Discharge'] = p.pop(key)
        elif 'Pack Size' in key:
            p['Dimensions'] = p.pop(key)
        elif 'battery size' in key.lower():
            p['Dimensions'] = p.pop(key)
        elif 'dimension' in key.lower():
            p['Dimensions'] = p.pop(key)
        elif 'configuration' in key.lower():
            p['Configuration'] = p.pop(key)
    
    
    # Account for inconsistency on HK website
    if p['URL'] == r'https://hobbyking.com/en_us/turnigy-2200mah-4s-20c-lipoly-pack-w-xt60-connector.html':
        p['Discharge'] = p['Constant Discharge']
    
    # Split Discharge into Peak and Continuous
    if 'Discharge' in p.keys():
        discharge = re.findall(r'([0-9].C)', p.pop('Discharge'))
        if discharge:
            p['Constant Discharge'] = discharge[0]
            p['Peak Discharge'] = discharge[1]
    
    # Strip non-numeric characters from numeric fields
    no_number_re = r'[^0-9|.]'
    p['Capacity [mAh]'] = float(re.sub(no_number_re, '', p.pop('Capacity')))
    p['Weight [g]'] = float(re.sub(no_number_re, '', p.pop('Weight')))
    p['Price [USD]'] = float(re.sub(no_number_re, '', p.pop('Price')))
    p['Constant Discharge [c]'] = int(re.sub(no_number_re, '', p.pop('Constant Discharge')))
    p['Peak Discharge [c]'] = int(re.sub(no_number_re, '', p.pop('Peak Discharge')))
    
    # Fix error on HK Website
    if p['URL'] == r'https://hobbyking.com/en_us/turnigy-graphene-850mah-4s-75c-lipo-pack-w-xt60u.html':
        p['Capacity'] = float(850)
    
    print(p)
    products.append(p)
    if not specs_found: raise Exception('No Specs')

{'Name': 'Turnigy Graphene 1300mAh 4S 65C Lipo Pack w/XT60                                ', 'URL': 'https://hobbyking.com/en_us/graphene-1300mah-4s-65c-w-xt60.html', 'Voltage': '\xa04S1P / 4 Cell / 14.8V', 'Balance Plug': '\xa0JST-XH', 'Dimensions': '\xa075x36x36mm', 'Discharge Plug': '\xa0XT-60', 'Capacity [mAh]': 1300.0, 'Weight [g]': 176.0, 'Price [USD]': 26.93, 'Constant Discharge [c]': 65, 'Peak Discharge [c]': 30}
{'Name': 'Turnigy Graphene Panther 850mAh 4S 75C Battery Pack w/XT60                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-graphene-850mah-4s-75c-lipo-pack-w-xt60u.html', 'Cell Count': ' 4', 'Voltage': ' 14.8V', 'Pack Resistance': ' 22mΩ', 'Dimensions': ' 56x31x38mm', 'Balance Plug': ' JST-XH', 'Discharge Plug': ' XT60', 'Capacity [mAh]': 8500.0, 'Weight [g]': 120.0, 'Price [USD]': 18.78, 'Constant Discharge [c]': 75, 'Peak Discharge [c]': 150, 'Capacity': 850.0}
{'Name': 'Turnigy Graphene Panther 500mAh 4S 75C Battery Pack w/XT30               

{'Name': 'Turnigy 2200mAh 4S 30C Lipo Pack                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-2200mah-4s-30c-lipo-pack.html', 'Dimensions': '\xa0105 x\xa034 x 35mm', 'Balance Plug': ' JST-XH', 'Discharge Plug': ' XT60\xa0', 'Configuration': ' 4S1P / 14.8v / 4Cell', 'Capacity [mAh]': 2200.0, 'Weight [g]': 258.0, 'Price [USD]': 23.36, 'Constant Discharge [c]': 30, 'Peak Discharge [c]': 40}
{'Name': 'Turnigy 2200mAh 4S 40C Lipo Pack                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-2200mah-4s-40c-lipo-pack.html', 'Dimensions': ' 105 x 34 x 34mm', 'Balance Plug': ' JST-XH ', 'Discharge Plug': ' XT60', 'Configuration': ' 4S1P / 14.8v / 4Cell', 'Capacity [mAh]': 2200.0, 'Weight [g]': 248.0, 'Price [USD]': 24.94, 'Constant Discharge [c]': 40, 'Peak Discharge [c]': 50}
{'Name': 'ZIPPY Flightmax 1800mAh 4S1P 40C                                ', 'URL': 'https://hobbyking.com/en_us/zippy-flightmax-1800mah-4s1p-40c.html', 'Voltage': ' 4S1P /\

{'Name': 'Turnigy nano-tech 850mah 4S 25~50C Lipo Pack                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-nano-tech-850mah-4s-25-50c-lipo-pack.html', 'Voltage': ' 4S1P / 4 Cell / 14.8V', 'Balance Plug': ' JST-XH', 'Dimensions': ' 56x30x31mm', 'Discharge Plug': ' XT-60', 'Capacity [mAh]': 850.0, 'Weight [g]': 94.0, 'Price [USD]': 10.99, 'Constant Discharge [c]': 25, 'Peak Discharge [c]': 50}
{'Name': 'Turnigy nano-tech 850mAh 4S 45~90C Lipo Pack                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-nano-tech-850mah-4s-45-90c-lipo-pack.html', 'Voltage': ' 4S1P /\xa04 Cell / 14.8V', 'Balance Plug': ' JST-XH', 'Dimensions': ' 56x31x33mm', 'Discharge Plug': '\xa0XT-60', 'Capacity [mAh]': 850.0, 'Weight [g]': 99.0, 'Price [USD]': 14.88, 'Constant Discharge [c]': 45, 'Peak Discharge [c]': 90}
{'Name': 'ZIPPY Compact 2200mAh 4s 40c Lipo Pack                                ', 'URL': 'https://hobbyking.com/en_us/zippy-compact-2200mah-4s-40c-lipo

{'Name': 'Turnigy Heavy Duty 4000mAh 4S 60C Lipo Pack w/XT-90                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-battery-heavy-duty-4000mah-4s-60c-lipo-pack-xt-90.html', 'Specs': '\xa0', 'Wire Gauge': '\xa010AWG', 'Balance Plug': '\xa0JST-XH', 'Discharge Plug': ' XT-90', 'Configuration': '\xa04S1P / 14.8v / 4 Cell', 'Dimensions': '\xa0143 x 51 x 31mm', 'Capacity [mAh]': 4000.0, 'Weight [g]': 480.0, 'Price [USD]': 49.94, 'Constant Discharge [c]': 60, 'Peak Discharge [c]': 20}
{'Name': 'Turnigy nano-tech 2700mah 4S 65~130C Lipo Pack w/XT-60                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-battery-nano-tech-2700mah-4s-65-130c-lipo-pack-xt-60.html', 'Voltage': '\xa04S1P /\xa04 Cell / 14.8V', 'Balance Plug': '\xa0JST-XH', 'Dimensions': '\xa0134 x 44 x 27mm', 'Discharge Plug': '\xa0XT-60', 'Capacity [mAh]': 2700.0, 'Weight [g]': 332.0, 'Price [USD]': 45.1, 'Constant Discharge [c]': 65, 'Peak Discharge [c]': 30}
{'Name': 'Turnigy nano-te

{'Name': 'Turnigy 5000mAh 4S 25C Lipo Pack w/XT-90                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-battery-5000mah-4s-25c-lipo-pack-xt-90.html', 'Dimensions': ' 147 x 49 x 33mm', 'Balance Plug': ' JST-XH', 'Discharge Plug': ' XT-90', 'Configuration': ' 4S1P / 14.8v / 4Cell', 'Capacity [mAh]': 5000.0, 'Weight [g]': 552.0, 'Price [USD]': 40.02, 'Constant Discharge [c]': 25, 'Peak Discharge [c]': 35}
{'Name': 'ZIPPY Flightmax 3000mAh 4S1P 20C LiPo Pack w/XT60                                ', 'URL': 'https://hobbyking.com/en_us/zippy-flightmax-3000mah-4s1p-20c-lipo-pack-w-xt60.html', 'Voltage': ' 4S1P /\xa04 Cell / 14.8v', 'Balance Plug': ' JST-XH', 'Dimensions': ' 136x45x23mm', 'Discharge Plug': ' XT60', 'Capacity [mAh]': 3000.0, 'Weight [g]': 285.0, 'Price [USD]': 21.85, 'Constant Discharge [c]': 20, 'Peak Discharge [c]': 40}
{'Name': 'Turnigy Graphene Panther 1600mAh 4S 75C Battery Pack (Removable Balance Lead)                                ', 'URL': 'htt

{'Name': 'Turnigy 1600mAh 4S 30C Lipo Pack                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-1600mah-4s-30c-lipo-pack.html', 'Dimensions': '\xa090 x\xa035 x 25mm', 'Balance Plug': ' JST-XH ', 'Discharge Plug': '\xa0XT60 \xa0', 'Configuration': ' 4S1P / 14.8v / 4Cell', 'Capacity [mAh]': 1600.0, 'Weight [g]': 177.0, 'Price [USD]': 19.77, 'Constant Discharge [c]': 30, 'Peak Discharge [c]': 40}
{'Name': 'Turnigy 1800mAh 4S 30C Lipo Pack                                ', 'URL': 'https://hobbyking.com/en_us/turnigy-1800mah-4s-30c-lipo-pack.html', 'Dimensions': '\xa0105 x\xa035 x 26mm', 'Balance Plug': ' JST-XH', 'Discharge Plug': ' XT60\xa0', 'Configuration': ' 4S1P / 14.8v / 4Cell', 'Capacity [mAh]': 1800.0, 'Weight [g]': 185.0, 'Price [USD]': 24.77, 'Constant Discharge [c]': 30, 'Peak Discharge [c]': 40}


In [182]:
df = pd.DataFrame(products)
df['Capacity [Ah]'] = df['Capacity [mAh]'] / 1000
df['Energy [Wh]'] = 14.8 * df['Capacity [Ah]']
df['Energy Density [Wh/g]'] = df['Energy [Wh]'] / df['Weight [g]']
df['Constant Discharge [A]'] = df['Constant Discharge [c]'] * df['Capacity [Ah]']
df['Energy Value [Wh/$]'] = df['Energy [Wh]'] / df['Price [USD]']

In [184]:
filtered = df[(df['Weight [g]'] < 210) & 
              (df['Energy [Wh]']>18) &
              (df['Constant Discharge [A]'] > ((12*4)+1))]

In [185]:
filtered.sort_values('Energy Density [Wh/g]', ascending=False).head(5)

Unnamed: 0,Balance Plug,Capacity,Capacity [mAh],Cell Count,Configuration,Constant Discharge [c],Dimensions,Discharge Plug,Max Charge Rate,Name,...,Specs,URL,Voltage,Weight [g],Wire Gauge,Capacity [Ah],Energy [Wh],Energy Density [Wh/g],Constant Discharge [A],Energy Value [Wh/$]
1,JST-XH,850.0,8500.0,4.0,,75,56x31x38mm,XT60,,Turnigy Graphene Panther 850mAh 4S 75C Battery...,...,,https://hobbyking.com/en_us/turnigy-graphene-8...,14.8V,120.0,,8.5,125.8,1.048333,637.5,6.698616
99,JST-XH,,1800.0,,4S1P / 14.8v / 4Cell,30,105 x 35 x 26mm,XT60,,Turnigy 1800mAh 4S 30C Lipo Pack ...,...,,https://hobbyking.com/en_us/turnigy-1800mah-4s...,,185.0,,1.8,26.64,0.144,54.0,1.075495
4,JST-XH,,1400.0,,4S1P / 14.8v / 4Cell,40,87.5 x 33.5 x 27mm,XT60,,Turnigy 1400mAh 4S 40C Lipo Pack w/XT60 ...,...,,https://hobbyking.com/en_us/turnigy-1400mah-4s...,,153.0,,1.4,20.72,0.135425,56.0,1.481058
30,JST-XH,,1600.0,,,35,106x34x23mm,XT60,,ZIPPY Compact 1600mAh 4S 35C Lipo Pack ...,...,,https://hobbyking.com/en_us/zippy-compact-1600...,4S1P / 4 Cell / 14.8V,178.0,,1.6,23.68,0.133034,56.0,1.221248
83,JST-XH,,1300.0,,,40,77x34x31mm,XT60,,ZIPPY Compact 1300mAh 4s 40c Lipo Pack ...,...,,https://hobbyking.com/en_us/zippy-compact-1300...,4S1P / 4 Cell / 14.8V,148.0,,1.3,19.24,0.13,52.0,1.260813


In [177]:
filtered.sort_values('Energy Value [Wh/$]', ascending=False).head(5)

Unnamed: 0,Configuration,Balance Plug,Capacity,Cell Count,Configuration.1,Constant Discharge,Dimensions,Discharge Plug,Max Charge Rate,Name,...,Specs,URL,Voltage,Weight,Wire Gauge,Capacity [Ah],Energy [Wh],Energy Density [Wh/g],Constant Discharge [A],Energy Value [Wh/$]
4,,JST-XH,1400.0,,4S1P / 14.8v / 4Cell,40,87.5 x 33.5 x 27mm,XT60,,Turnigy 1400mAh 4S 40C Lipo Pack w/XT60 ...,...,,https://hobbyking.com/en_us/turnigy-1400mah-4s...,,153.0,,1.4,20.72,0.135425,56.0,1.481058
3,,JST-XH,1400.0,,4S1P / 14.8v / 4Cell,65,91.5 x 35 x31mm,XT60,,Turnigy 1400mAh 4S 65C Lipo Pack w/XT60 ...,...,,https://hobbyking.com/en_us/turnigy-1400mah-4s...,,179.0,,1.4,20.72,0.115754,91.0,1.382255
75,,JST-XH,1800.0,,,40,109x35x29mm,XT60,,ZIPPY Compact 1800mAh 4s 40c Lipo Pack ...,...,,https://hobbyking.com/en_us/zippy-compact-1800...,4S1P / 4 Cell / 14.8V,208.0,,1.8,26.64,0.128077,72.0,1.368259
83,,JST-XH,1300.0,,,40,77x34x31mm,XT60,,ZIPPY Compact 1300mAh 4s 40c Lipo Pack ...,...,,https://hobbyking.com/en_us/zippy-compact-1300...,4S1P / 4 Cell / 14.8V,148.0,,1.3,19.24,0.13,52.0,1.260813
37,,JST-XH,1800.0,,,35,103x35x29mm,XT60 Connector,,Turnigy nano-tech 1800mah 4S 35~70C Lipo Pack ...,...,,https://hobbyking.com/en_us/turnigy-nano-tech-...,4S1P / 4 Cell / 14.8V,207.0,,1.8,26.64,0.128696,63.0,1.256011


In [189]:
df.iloc[30]['URL']

'https://hobbyking.com/en_us/zippy-compact-1600mah-4s-35c-lipo-pack.html'

In [186]:
df.columns

Index(['Balance Plug', 'Capacity', 'Capacity [mAh]', 'Cell Count',
       'Configuration', 'Constant Discharge [c]', 'Dimensions',
       'Discharge Plug', 'Max Charge Rate', 'Name', 'Pack Resistance',
       'Peak Discharge [c]', 'Price [USD]', 'Specs', 'URL', 'Voltage',
       'Weight [g]', 'Wire Gauge', 'Capacity [Ah]', 'Energy [Wh]',
       'Energy Density [Wh/g]', 'Constant Discharge [A]',
       'Energy Value [Wh/$]'],
      dtype='object')

In [96]:
df.to_csv('batteries.csv')