In [2]:
urls = ["https://www.motorcyclespecs.co.za/model/triu/triumph-street-triple-rs-22.html",
        "https://www.motorcyclespecs.co.za/model/ducati/ducati_scrambler_desert_sled_21.html",
        "https://www.motorcyclespecs.co.za/model/bmw/bmw-r100-7-78.html",
       "https://www.motorcyclespecs.co.za/model/kawasaki/kawasaki_zrx1100%2097.htm",
       "https://www.motorcyclespecs.co.za/model/kawasaki/kawasaki_zrx1200r%2004.htm",
       "https://www.motorcyclespecs.co.za/model/triu/triumph_speed_triple_1200rs_21.html",
        "https://www.motorcyclespecs.co.za/model/kawasaki/kawasaki_z900rs_20.html",
       "https://www.motorcyclespecs.co.za/model/yamaha/yamaha_r1_20.html",
        "https://www.motorcyclespecs.co.za/model/suzu/suzuki_gsxr1300r_21.html",
        "https://www.motorcyclespecs.co.za/model/suzu/suzuki_sv_650_18.htm",
        "https://www.motorcyclespecs.co.za/model/suzu/suzuki_sv650n%2007.htm"
       ]

In [3]:
import requests
from bs4 import BeautifulSoup
import re
import time

def scrape_table_to_name_value_pairs(url):
    # Request the Page
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml') # important to pip install lxml
    
    # Find the Table
    hook = soup.find_all(string=re.compile(".*Make Model.*"))
    assert(len(hook)==1)  # Assert we find one and exactly one NavigatableString containing "Make Model"
    hook = hook[0]
    table = hook.find_parent('table')
    rows = table.find_all('tr')
    assert(len(rows) > 1) # Assert we've found and parsed a table with many rows
    
    # Extract the name:value pairs
    output = {}
    for row in rows:
        cols = row.find_all('td')
        assert(len(cols) == 2) # We expect two columns in each row, which we treat as a name:value pair
        name = cols[0].get_text().strip()
        value = cols[1].get_text().strip().replace("\t", "")
        output[name] = value
    output["link"] = url
    return output

In [4]:
outputs = []
for url in urls:
    print("Scraping", url)
    output = scrape_table_to_name_value_pairs(url)
    outputs.append(output)

Scraping https://www.motorcyclespecs.co.za/model/triu/triumph-street-triple-rs-22.html
Scraping https://www.motorcyclespecs.co.za/model/ducati/ducati_scrambler_desert_sled_21.html
Scraping https://www.motorcyclespecs.co.za/model/bmw/bmw-r100-7-78.html
Scraping https://www.motorcyclespecs.co.za/model/kawasaki/kawasaki_zrx1100%2097.htm
Scraping https://www.motorcyclespecs.co.za/model/kawasaki/kawasaki_zrx1200r%2004.htm
Scraping https://www.motorcyclespecs.co.za/model/triu/triumph_speed_triple_1200rs_21.html
Scraping https://www.motorcyclespecs.co.za/model/kawasaki/kawasaki_z900rs_20.html
Scraping https://www.motorcyclespecs.co.za/model/yamaha/yamaha_r1_20.html
Scraping https://www.motorcyclespecs.co.za/model/suzu/suzuki_gsxr1300r_21.html
Scraping https://www.motorcyclespecs.co.za/model/suzu/suzuki_sv_650_18.htm
Scraping https://www.motorcyclespecs.co.za/model/suzu/suzuki_sv650n%2007.htm


In [34]:
outputs

[{'Make Model': 'Triumph Street Triple RS',
  'Year': '2022',
  'Engine': 'Four stroke,\xa0 in-line 3-cylinder, DOHC, 4 valve per \ncylinder',
  'Capacity': '765 cc / 46.6 cu-in',
  'Bore x Stroke': '78 x 53.4 mm',
  'Cooling System': 'Liquid-cooled',
  'Compression Ratio': '12.54:1',
  'Lubrication': 'Wet sump',
  'Induction': 'Multipoint sequential electronic fuel \ninjection with SAI. Electronic throttle control',
  'Exhaust': 'Stainless steel 3 into 1 exhaust system low \nsingle sided stainless steel silencer',
  'Emission': 'Euro 5',
  'Ignition': 'Digital - inductive type',
  'Starting': 'Electric',
  'Max Power': '121.3 hp / 90 kW @ 11750 rpm',
  'Max Torque': '79 Nm / \n58.3 lb-ft\xa0 @ 9350 rpm',
  'Clutch': 'Wet, multi-plate, slip-assisted',
  'Transmission': '6 Speed with Triumph Shift Assist',
  'Final Drive': 'X ring chain',
  'Frame': 'Front - \nAluminum beam twin spar\nRear - 2 piece high pressure die cast',
  'Swingarm': 'Twin-sided, cast aluminum alloy',
  'Front Suspe

In [36]:
import pandas as pd
df = pd.DataFrame(outputs)
df

Unnamed: 0,Make Model,Year,Engine,Capacity,Bore x Stroke,Cooling System,Compression Ratio,Lubrication,Induction,Exhaust,...,Front \n\nWheel Travel,Rear \n\nWheel Travel,Front \n\nWheel,Rear \n\nWheel,Ground clearance,Engine Oil,Spark Plug,Primary Reduction,Final Reduction,Oil Capacity
0,Triumph Street Triple RS,2022,"Four stroke, in-line 3-cylinder, DOHC, 4 valv...",765 cc / 46.6 cu-in,78 x 53.4 mm,Liquid-cooled,12.54:1,Wet sump,Multipoint sequential electronic fuel \ninject...,Stainless steel 3 into 1 exhaust system low \n...,...,,,,,,,,,,
1,Ducati Scrambler 800 Desert \nSled,2021,"Four stroke, 90° L twin cylinder, SOHC, \n ...",803 cc / 49.0 cub in,88 x 66 mm,Air cooled,11.0:1,,"Electronic fuel injection, 50 \nmm throttle body",Stainless steel muffler with \ncatalytic conve...,...,,,,,,,,,,
2,BMW R 100/7,1978 - 10,"Four stroke, \n two cylinder horizontally o...",980 cc / 59.8 cu in.,94 x 70.6 mm,Air cooled,9.1:1,,2 x 36mm Bing V94 carburetors,,...,,,,,,,,,,
3,Kawasaki ZR-X 1100,1997 - 98,"Four stroke, transverse four \n cylinder, D...",1052 cc / 64.2 cu-in,79 x 59.4 mm,Liquid cooled,10.1:1,,4x 36mm Mikuni carburetors,,...,,,,,,,,,,
4,Kawasaki ZRX 1200R,2005 - 06,"Four stroke, transverse four \n cylinder, D...",1165 cc / 71.0 cu-in,79 x 59.4 mm.,Liquid cooled,10.1:1,,4x Keihin CVK36 carburetors,,...,,,,,,,,,,
5,Triumph Speed Triple 1200RS,2021,"Four \n stroke, transverse three cylinder, ...",1160 cc / 70.7 cu-in,90 x 60.8 mm,Liquid-cooled,13.2:1,,Multipoint sequential electronic fuel injectio...,Stainless steel 3 into 1 header system with un...,...,,,,,,,,,,
6,Kawasaki Z 900RS,2020,"Four stroke, transverse four \n cylinder, D...",948 cc / 57.8 cu-in,73.4 x 56.0 mm,Liquid cooled,11.8:1,Forced lubrication wet sum,DFI with 36mm Keihin throttle bodies,,...,,,,,,,,,,
7,Yamaha YZF 1000 R1,2020,"Four stroke, transverse \nfour cylinder, DOHC,...",998 cc / 60.9 cu-in,79.0 x 50.9 mm,Liquid cooled,13.0:1,Wet sump,Fuel Injection with YCC-T and YCC-I,,...,,,,,,,,,,
8,Suzuki GSX 1300R \n Hayabusa,2021,"Four stroke, transverse four \n cylinder, D...",1340 cc / 81.8 cu-in,81 x 65 mm,Liquid cooled,12.5:1,Wet sump,Fuel injection with Ride-by-Wire throttle bodies,,...,,,,,,,,,,
9,Suzuki SV \n 650 / ABS,2018 - 19,"Four stroke, 90°-V-twin, \n DOHC, 4 valves ...",645 cc / 39.3 cu in,81 x 62.6 mm,Liquid cooled,11.2:1,Wet sump,"Fuel Injection, 39mm \nthrottle bodies",,...,125 mm / 4.9 in,130 mm / 5.1 in,"3.50 x 17, Aluminium alloy, 5-spoke","5.00 x 17, Aluminium alloy, 5-spoke",135mm / 5.3 in,,,,,


In [5]:
df.to_csv('02 - motorcycles.csv')

NameError: name 'df' is not defined