# Match Stat Scraper

In [163]:
#Scraping Imports
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
#SQL Imports
import mysql.connector
#Pandas imports
import pandas as pd

import os

In [164]:
#DB Connection
mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  passwd="",
  database="NRL_data"
)
mycursor = mydb.cursor(buffered=True)

## 1. Get URL's of all matches that have not been scraped

In [171]:
#Get all matches
all_matches_query = pd.read_sql_query('SELECT id, date, url, home_team_id, away_team_id FROM Matches;', mydb)
all_match_df = pd.DataFrame(all_matches_query, columns=['id', 'date', 'url', 'home_team_id', 'away_team_id'])

#Find matches that were already scraped
already_scraped = 'SELECT DISTINCT match_id FROM PlayerMatchStats;'
mycursor.execute(already_scraped,)
results = mycursor.fetchall()
already_scraped_list = list(map(lambda x: x[0], results))

#Remove matches which were already scraped and save remaining match info to not_yet_scraped_df
not_yet_scraped = set(list(all_match_df['id'])) - set(already_scraped_list)
not_yet_scraped_df = all_match_df[all_match_df['id'].isin(not_yet_scraped)]
not_yet_scraped_df

Unnamed: 0,id,date,url,home_team_id,away_team_id
1398,1467,2013-09-13,https://www.nrl.com/draw/nrl-premiership/2013/...,13,7
1399,1468,2013-09-14,https://www.nrl.com/draw/nrl-premiership/2013/...,4,10
1400,1469,2013-09-14,https://www.nrl.com/draw/nrl-premiership/2013/...,15,6
1401,1470,2013-09-15,https://www.nrl.com/draw/nrl-premiership/2013/...,3,8
1402,1471,2013-09-20,https://www.nrl.com/draw/nrl-premiership/2013/...,6,4
1403,1472,2013-09-21,https://www.nrl.com/draw/nrl-premiership/2013/...,7,8
1404,1473,2013-09-27,https://www.nrl.com/draw/nrl-premiership/2013/...,13,6
1405,1474,2013-09-28,https://www.nrl.com/draw/nrl-premiership/2013/...,15,8
1406,1475,2013-10-06,https://www.nrl.com/draw/nrl-premiership/2013/...,15,6


# 2. Scraping Process


In [174]:
#Finds team id based off nickname
def find_team_id(name):
    find_team_query = 'SELECT id FROM Teams WHERE nickname = %s;'
    mycursor.execute(find_team_query, (name,))
    return mycursor.fetchone()[0]

#Finds player based off name or creates entry in database for player
def find_or_create_player(first_name, last_name, team_id):
    find_player_query = 'SELECT id FROM Players WHERE first_name = %s AND last_name LIKE %s AND current_team = %s LIMIT 1;'
    mycursor.execute(find_player_query, (first_name, '%' + last_name + '%', team_id))
    result = mycursor.fetchone()
    if result is None:
        insert_player_query = 'INSERT INTO Players (first_name, last_name, current_team) VALUES (%s, %s, %s);'
        data = (first_name, last_name, team_id)
        mycursor.execute(insert_player_query, data)
        mydb.commit()
        result = find_or_create_player(first_name, last_name, team_id)
        return int(result)
    else:
        result = result[0]
        return result

In [156]:
def scrape_match(match, web_driver):
    match_key = match['url'].split('nrl-premiership/')[1][:-1]
    for char in ['-vs-', '-v-', '/', '-']:
        match_key = match_key.replace(char, '_')
    results = {}
    web_driver.get(match['url'])
    for xpath in ['1', '2']:
        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="player-stats"]/div[' + xpath + ']/div/div[3]/div/table/tbody')))
        for i in range(1, 20):
            try:
                name_field = driver.find_element_by_xpath('//*[@id="player-stats"]/div[' + xpath + ']/div/div[3]/div/table/tbody/tr['+ str(i) +']/td[2]/a').get_attribute('innerText').strip()
                first_name = name_field.split(' ')[0].strip().capitalize()
                last_name = name_field.split(' ')[-1].strip().capitalize()
                middle_name = name_field.split(' ')[-2].strip()
                if middle_name.isalpha():
                    last_name = middle_name.capitalize() + ' ' + last_name
                if xpath == '1':
                    team_id = match['home_team_id']
                elif xpath == '2':
                    team_id = match['away_team_id']
                full_name = first_name + '_' + last_name + '_' + str(team_id)
                player_id = find_or_create_player(first_name, last_name, str(team_id))
                    
                player_stat_list = []
                player_stat_list.append(player_id)
                player_stat_list.append(team_id)
                player_stat_list.append(match['id'])
                    
                for column in range(3, 67):
                    if column in [5, 7, 15, 17, 21, 34, 40, 47, 56, 64]:
                        continue
                    else:
                        stat_field = driver.find_element_by_xpath('//*[@id="player-stats"]/div[' + xpath + ']/div/div[3]/div/table/tbody/tr[' + str(i) + ']/td[' + str(column) + ']')
                        player_stat_list.append(stat_field.get_attribute('innerText').strip())
                print(player_stat_list)
                results[full_name] = player_stat_list
            except:
                continue
    print('Scraping Success: ' + match['url'])
    return results

In [157]:
def save_data_to_csv(match, match_dict):
    column_names = ['player_id', 'team_id', 'match_id', 'number', 'position', 'minutes_played', 'points', 'tries',
                'conversions','conversion_attempts', 'penalty_goals', 'conversion_percentage','field_goals',
                'fantasy_points', 'total_runs', 'total_run_metres', 'kick_return_metres', 'post_contact_metres',
                'line_breaks', 'line_break_assists', 'try_assists', 'line_engaged_runs', 'tackle_breaks', 'hit_ups',
                'play_the_ball', 'average_play_the_ball_seconds', 'dummy_half_runs', 'dummy_half_run_metres', 
                'steals', 'offloads', 'dummy_passes', 'passes', 'receipts', 'pass_to_run_ratio', 'tackle_percentage',
                'tackles_made', 'tackles_missed', 'ineffective_tackles', 'intercepts', 'kicks_defused', 'kicks',
                'kicking_metres', 'forced_drop_outs', 'bomb_kicks', 'grubbers', 'fourty_twenty',
                'cross_field_kicks', 'kicked_dead', 'errors', 'handling_errors', 'one_on_ones_lost', 'penalties',
                'on_report', 'sin_bins', 'send_offs', 'stint_one', 'stint_two']
    year = match['date'].year
    month = match['date'].month
    match_id = match['id']
    csv_filename = str(year) + '_' + str(month) + '_' + 'MatchID_' + str(match_id) + '.csv'
    #print(csv_filename)
    
    csv_data = pd.DataFrame.from_dict(match_dict, orient='index', columns=column_names).reset_index()
    print(csv_data)
    csv_data = csv_data.replace('-', 0).replace({pd.np.nan: 0})
    
    yeardir = './csv_files/' + str(year)
    if not os.path.exists(yeardir):
        os.mkdir(yeardir)
    monthdir = yeardir + '/' + str(month)
    if not os.path.exists(monthdir):
        os.mkdir(monthdir)
    csv_data.to_csv(monthdir + '/' + csv_filename)
    print("CSV success: " + str(match_id))

In [172]:
#Set Up WebDriver
driver = webdriver.Chrome(ChromeDriverManager().install())
wait = WebDriverWait(driver, 10)


Checking for mac64 chromedriver:83.0.4103.39 in cache
Driver found in /Users/nickpowers/.wdm/chromedriver/83.0.4103.39/mac64/chromedriver


In [159]:
scraping_errors = []
csv_conversion_errors = []

In [173]:
for match in not_yet_scraped_df.iterrows():
    try:
        match_data = scrape_match(match[1], driver)
        try:
            save_data_to_csv(match[1], match_data)
        except:
            print("CSV error: " + str(match[1]['id']))
            csv_conversion_errors.append(match[1]['id'])
    except:
        print('scraping error: ' + str(match[1]['url']))
        scraping_errors.append(match[1]['url'])

[352, 13, 1467, '1', 'Fullback', '80:00', '-', '-', '-', '-', '-', '-', '-', '-', '15', '139', '20', '56', '-', '1', '1', '-', '4', '2', '-', '-', '-', '-', '-', '1', '-', '10', '26', '0.67', '100%', '6', '-', '-', '-', '2', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1684, 13, 1467, '2', 'Winger', '80:00', '-', '-', '-', '-', '-', '-', '-', '1', '6', '55', '13', '16', '-', '-', '-', '-', '-', '-', '-', '-', '1', '6', '-', '-', '-', '4', '13', '0.67', '66.7%', '4', '1', '1', '1', '4', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1901, 13, 1467, '3', 'Centre', '80:00', '-', '-', '-', '-', '-', '-', '-', '1', '11', '88', '3', '25', '-', '-', '-', '-', '-', '1', '-', '-', '3', '24', '-', '-', '-', '11', '23', '1', '88.5%', '23', '2', '1', '-', '2', '2', '56', '-', '-', '1', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '80:00', '-']
[1988, 13, 1467, '4', 'Centre', '80:00', '-', '-', '-', '-', '-

[34 rows x 58 columns]
CSV success: 1467
[1828, 4, 1468, '1', 'Fullback', '80:00', '4', '-', '2', '4', '-', '50%', '-', '2', '21', '183', '43', '79', '1', '1', '1', '-', '5', '1', '-', '-', '5', '45', '-', '3', '-', '8', '28', '0.38', '50%', '3', '3', '-', '-', '6', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[97, 4, 1468, '2', 'Winger', '80:00', '4', '1', '-', '-', '-', '-', '-', '-', '14', '106', '-', '36', '1', '-', '-', '-', '1', '-', '-', '-', '2', '12', '-', '-', '-', '1', '12', '0.07', '83.3%', '5', '-', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1925, 4, 1468, '3', 'Centre', '80:00', '4', '1', '-', '-', '-', '-', '-', '-', '10', '65', '-', '15', '1', '1', '1', '-', '3', '3', '-', '-', '-', '-', '-', '3', '-', '2', '12', '0.2', '82.4%', '14', '2', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1829, 4, 1468, '4', 'Ce

[34 rows x 58 columns]
CSV success: 1468
[1672, 15, 1469, '1', 'Fullback', '80:00', '-', '-', '-', '-', '-', '-', '-', '2', '13', '52', '19', '22', '-', '-', '-', '-', '3', '2', '-', '-', '-', '-', '-', '-', '2', '18', '35', '1.38', '50%', '2', '2', '-', '-', '6', '-', '-', '-', '-', '-', '-', '-', '-', '-', '2', '-', '1', '-', '-', '-', '80:00', '-']
[431, 15, 1469, '2', 'Winger', '80:00', '-', '-', '-', '-', '-', '-', '-', '1', '12', '121', '4', '31', '-', '-', '-', '-', '10', '1', '-', '-', '-', '-', '-', '1', '-', '1', '14', '0.08', '77.8%', '7', '2', '-', '-', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1673, 15, 1469, '3', 'Centre', '80:00', '-', '-', '-', '-', '-', '-', '-', '2', '11', '61', '1', '19', '-', '-', '-', '-', '2', '1', '-', '-', '2', '14', '-', '-', '1', '4', '19', '0.36', '88.9%', '24', '1', '2', '-', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '2', '-', '-', '-', '-', '-', '80:00', '-']
[1674, 15, 1469, '4',

[34 rows x 58 columns]
CSV success: 1469
[2436, 3, 1470, '1', 'Fullback', '80:00', '4', '1', '-', '-', '-', '-', '-', '2', '15', '134', '27', '40', '1', '-', '-', '-', '2', '3', '-', '-', '2', '22', '-', '-', '4', '10', '25', '0.67', '100%', '9', '-', '-', '-', '2', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[2437, 3, 1470, '2', 'Winger', '80:00', '-', '-', '-', '-', '-', '-', '-', '1', '10', '82', '12', '32', '-', '-', '-', '-', '3', '3', '-', '-', '4', '35', '-', '-', '-', '-', '12', '-', '66.7%', '2', '1', '-', '-', '3', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[2438, 3, 1470, '4', 'Centre', '80:00', '-', '-', '-', '-', '-', '-', '-', '1', '11', '147', '-', '56', '1', '-', '-', '-', '4', '4', '-', '-', '3', '39', '-', '1', '-', '4', '16', '0.36', '90.9%', '10', '1', '-', '-', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '80:00', '-']
[2439, 3, 1470, '3', '

[34 rows x 58 columns]
CSV success: 1470
[1955, 6, 1471, '19', 'Fullback', '80:00', '-', '-', '-', '-', '-', '-', '-', '1', '20', '250', '113', '73', '1', '-', '-', '-', '1', '-', '-', '-', '-', '-', '-', '2', '1', '12', '32', '0.6', '75%', '3', '1', '-', '-', '7', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[145, 6, 1471, '2', 'Winger', '80:00', '4', '1', '-', '-', '-', '-', '-', '-', '16', '173', '61', '63', '1', '-', '-', '-', '2', '-', '-', '-', '-', '-', '-', '-', '1', '3', '19', '0.19', '100%', '2', '-', '-', '-', '3', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '80:00', '-']
[1709, 6, 1471, '3', 'Centre', '80:00', '8', '-', '3', '3', '1', '100%', '-', '-', '6', '66', '-', '25', '-', '1', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '19', '25', '3.17', '95.7%', '22', '1', '-', '-', '-', '2', '5', '-', '-', '2', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '80:00', '-']
[1710, 6, 1471, '4', 

[34 rows x 58 columns]
CSV success: 1471
[568, 7, 1472, '1', 'Fullback', '80:00', '-', '-', '-', '-', '-', '-', '-', '1', '13', '130', '65', '30', '-', '-', '-', '-', '1', '-', '-', '-', '3', '15', '1', '1', '-', '27', '48', '2.08', '71.4%', '5', '2', '-', '-', '10', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '1', '-', '-', '-', '80:00', '-']
[1946, 7, 1472, '2', 'Winger', '73:00', '4', '1', '-', '-', '-', '-', '-', '2', '19', '171', '24', '43', '-', '-', '-', '-', '5', '2', '-', '-', '2', '15', '-', '1', '-', '2', '24', '0.11', '25%', '1', '1', '2', '-', '4', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '73:00', '-']
[187, 7, 1472, '3', 'Centre', '80:00', '-', '-', '-', '-', '-', '-', '-', '-', '13', '119', '24', '34', '-', '-', '-', '-', '1', '-', '-', '-', '1', '20', '-', '-', '-', '2', '15', '0.15', '90.9%', '20', '1', '1', '-', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1974, 7, 1472, '4'

[1865, 8, 1472, '10', 'Prop', '26:00', '-', '-', '-', '-', '-', '-', '-', '-', '6', '51', '10', '17', '-', '-', '-', '-', '-', '5', '-', '-', '-', '-', '-', '-', '-', '-', '7', '-', '90.5%', '19', '2', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '1', '-', '-', '-', '26:00', '-']
[1871, 8, 1472, '11', '2nd Row', '80:00', '-', '-', '-', '-', '-', '-', '-', '-', '15', '122', '-', '38', '-', '-', '-', '-', '-', '15', '-', '-', '-', '-', '-', '-', '-', '3', '18', '0.2', '92.5%', '37', '3', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1866, 8, 1472, '12', '2nd Row', '80:00', '-', '-', '-', '-', '-', '-', '-', '-', '4', '36', '-', '24', '-', '1', '1', '-', '1', '4', '-', '-', '-', '-', '-', '1', '-', '5', '10', '1.25', '85%', '34', '2', '4', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1868, 8, 1472, '13', 'Lock', '53:00', '-', '-', '-', '-', '-', '-', '

[34 rows x 58 columns]
CSV success: 1472
[352, 13, 1473, '1', 'Fullback', '79:00', '-', '-', '-', '-', '-', '-', '-', '-', '13', '151', '60', '56', '-', '2', '1', '-', '4', '-', '-', '-', '-', '-', '-', '-', '2', '11', '25', '0.85', '90%', '9', '1', '-', '-', '7', '1', '16', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '-', '-', '-', '79:00', '-']
[1684, 13, 1473, '2', 'Winger', '79:00', '4', '1', '-', '-', '-', '-', '-', '1', '7', '68', '7', '23', '-', '-', '-', '-', '2', '-', '-', '-', '1', '7', '-', '-', '-', '3', '14', '0.43', '80%', '4', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '1', '-', '-', '-', '79:00', '-']
[1901, 13, 1473, '3', 'Centre', '79:00', '-', '-', '-', '-', '-', '-', '-', '1', '5', '34', '-', '14', '1', '-', '1', '-', '-', '1', '-', '-', '1', '-', '-', '1', '-', '6', '15', '1.2', '76%', '19', '3', '3', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '79:00', '-']
[1988, 13, 1473, '4', 'Centre

[34 rows x 58 columns]
CSV success: 1473
[1672, 15, 1474, '1', 'Fullback', '80:00', '-', '-', '-', '-', '-', '-', '-', '-', '14', '106', '53', '23', '1', '-', '-', '-', '2', '-', '-', '-', '1', '12', '-', '-', '-', '12', '28', '0.86', '80%', '8', '1', '1', '-', '9', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '80:00', '-']
[431, 15, 1474, '2', 'Winger', '80:00', '4', '1', '-', '-', '-', '-', '-', '2', '8', '117', '78', '17', '-', '-', '-', '-', '4', '2', '-', '-', '-', '-', '-', '-', '-', '1', '10', '0.13', '100%', '6', '-', '-', '-', '2', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1673, 15, 1474, '3', 'Centre', '80:00', '8', '2', '-', '-', '-', '-', '-', '1', '13', '152', '-', '22', '1', '-', '-', '-', '7', '-', '-', '-', '1', '7', '-', '-', '-', '4', '16', '0.31', '78.6%', '11', '3', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '80:00', '-']
[1674, 15, 1474, '4',

[34 rows x 58 columns]
CSV success: 1474
[1672, 15, 1475, '1', 'Fullback', '80:00', '-', '-', '-', '-', '-', '-', '-', '-', '13', '87', '35', '15', '-', '-', '1', '-', '1', '-', '-', '-', '1', '1', '-', '-', '3', '19', '30', '1.46', '100%', '5', '-', '-', '-', '7', '-', '-', '-', '-', '-', '-', '-', '-', '-', '1', '-', '-', '-', '-', '-', '80:00', '-']
[431, 15, 1475, '2', 'Winger', '80:00', '4', '1', '-', '-', '-', '-', '-', '-', '11', '69', '31', '26', '-', '-', '-', '-', '3', '1', '-', '-', '-', '-', '-', '1', '-', '1', '14', '0.09', '100%', '2', '-', '-', '-', '5', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1673, 15, 1475, '3', 'Centre', '80:00', '4', '1', '-', '-', '-', '-', '-', '-', '12', '85', '-', '21', '-', '-', '-', '-', '2', '-', '-', '-', '1', '10', '-', '-', '-', '-', '12', '-', '81.8%', '9', '1', '1', '-', '-', '1', '14', '1', '-', '1', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '80:00', '-']
[1674, 15, 1475, '4', 'Ce

[34 rows x 58 columns]
CSV success: 1475


# 3. Update URLs for matches which could not be scraped

In [169]:
#Get match ids of errored URLs for manual updating
find_incorrect_url_matches = 'SELECT id, url FROM Matches WHERE url IN {};'.format(tuple(scraping_errors))
mycursor.execute(find_incorrect_url_matches,)
results = mycursor.fetchall()
print(results)

[(1467, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-1/rabbitohs-vs-storm/'), (1468, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-1/sharks-vs-cowboys/'), (1469, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-1/roosters-vs-sea-eagles/'), (1470, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-1/bulldogs-vs-knights/'), (1471, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-2/sea-eagles-vs-sharks/'), (1472, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-2/storm-vs-knights/'), (1473, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-3/rabbitohs-vs-sea-eagles/'), (1474, 'https://www.nrl.com/draw/nrl-premiership/2013/finals-week-3/roosters-vs-knights/'), (1475, 'https://www.nrl.com/draw/nrl-premiership/2013/grand-final/roosters-vs-sea-eagles/')]


In [170]:
#Manually update URLs
mycursor.executemany("UPDATE Matches SET url = %s WHERE id = %s",
                    [("https://www.nrl.com/draw/nrl-premiership/2013/round-27/rabbitohs-vs-storm/", "1467"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-27/sharks-vs-cowboys/", "1468"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-27/roosters-vs-sea-eagles/", "1469"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-27/knights-vs-bulldogs/", "1470"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-28/sea-eagles-vs-sharks/", "1471"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-28/storm-vs-knights/", "1472"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-29/rabbitohs-vs-sea-eagles/", "1473"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-29/roosters-vs-knights/", "1474"),
                    ("https://www.nrl.com/draw/nrl-premiership/2013/round-30/roosters-vs-sea-eagles/", "1475")])
mydb.commit()

# 4. Repeat process from step 1

In [149]:
mycursor.close()

True