Tab Scraper
---

Author: Peter Zhang

Scraping tool for Tabroom.

### Setup

#### Imports

In [15]:
# imports
import urllib.request, urllib.parse, urllib.error
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import csv
import os.path
from os import path
import sys
from string import ascii_lowercase

#### Settings

- OVERWRITE determines whether or not to update existing files.
- PAGES_URL is a list of Wiki pages
- OUTPATH is where files are stored

In [16]:
# settings
OVERWRITE = True

In [17]:
# get tournament URLS
TOURNAMENT_CSV = 'tools/ld_tourns.csv'

In [18]:
# outpath
OUTPATH = "tab_data/"
ENTRIES_FILE = "disclosure_entries.csv"
INFO_FILE = "disclosure_info.csv"

In [19]:
# equivalent names
VLD_NAMES = [name.strip() for name in open('tools/VLD_names.txt', 'r')]
JVLD_NAMES = [name.strip() for name in open('tools/JVLD_names.txt', 'r')]
NLD_NAMES = [name.strip() for name in open('tools/NLD_names.txt', 'r')]
VPF_NAMES = [name.strip() for name in open('tools/VPF_names.txt', 'r')]
JVPF_NAMES = [name.strip() for name in open('tools/JVPF_names.txt', 'r')]
NPF_NAMES = [name.strip() for name in open('tools/NPF_names.txt', 'r')]
VCX_NAMES = [name.strip() for name in open('tools/VCX_names.txt', 'r')]
JVCX_NAMES = [name.strip() for name in open('tools/JVCX_names.txt', 'r')]
NCX_NAMES = [name.strip() for name in open('tools/NCX_names.txt', 'r')]

In [20]:
# set events to scrape
TARGET_EVENTS = ["VLD", "JVLD", "NLD", "VPF", "JVPF", "NPF", "VCX", "JVCX", "NCX"]
def getType(raw_name):
    if raw_name in VLD_NAMES:
        return "VLD"
    if raw_name in JVLD_NAMES:
        return "JVLD"
    if raw_name in NLD_NAMES:
        return "NLD"
    if raw_name in VPF_NAMES:
        return "VPF"
    if raw_name in JVPF_NAMES:
        return "JVPF"
    if raw_name in NPF_NAMES:
        return "NPF"
    if raw_name in VCX_NAMES:
        return "VCX"
    if raw_name in JVCX_NAMES:
        return "JVCX"
    if raw_name in NCX_NAMES:
        return "NCX"
    return "None"

### Entry Scrapers

#### Events

Take a tournament ID and get links to events.

In [21]:
def getEvents(tourn_id):
    url = "https://www.tabroom.com/index/tourn/fields.mhtml?tourn_id=" + tourn_id
    html = urlopen(url).read()
    soup = BeautifulSoup(html, "html.parser")
    links = [link for link in soup.find_all('a') if "event_id" in link.get('href')]
    return [(link.contents[0].strip(), link.get('href')) for link in links]

In [22]:
getEvents("16856")

[('Congressional Debate',
  '/index/tourn/fields.mhtml?tourn_id=16856&event_id=141000'),
 ('JV LD', '/index/tourn/fields.mhtml?tourn_id=16856&event_id=141001'),
 ('Novice LD', '/index/tourn/fields.mhtml?tourn_id=16856&event_id=141003'),
 ('Novice Public Forum',
  '/index/tourn/fields.mhtml?tourn_id=16856&event_id=141004'),
 ('Varsity LD', '/index/tourn/fields.mhtml?tourn_id=16856&event_id=141005'),
 ('Varsity Public Forum',
  '/index/tourn/fields.mhtml?tourn_id=16856&event_id=141006')]

#### Entries

Take the URL to an event's page and return event entry info.

In [23]:
# extract table from a page
def getEntries(url, eventType, tournName):
    
    html = urlopen(url).read()
    soup = BeautifulSoup(html, "html.parser")

    table = soup.find("table").find("tbody")
    
    entries = []
    for row in table.find_all("tr"):
        
        entries.append([tournName, eventType] + [field.text.strip() for field in row.find_all("td")])
    
    return entries



In [24]:
getEntries("https://www.tabroom.com/index/tourn/fields.mhtml?tourn_id=16856&event_id=141005", "JVLD", "AppleValley")

[['AppleValley',
  'JVLD',
  'Acton-Boxborough Regional High Scho',
  'MA/US',
  'Bellerina Hu',
  'Acton-Boxborough BH'],
 ['AppleValley',
  'JVLD',
  'Apple Valley High School',
  'MN/US',
  'John Schwartz',
  'Apple Valley JS'],
 ['AppleValley',
  'JVLD',
  'Apple Valley High School',
  'MN/US',
  'Nora Bolsoni',
  'Apple Valley NB'],
 ['AppleValley',
  'JVLD',
  'Appleton North',
  'WI/US',
  'Mihir Uberoi',
  'Appleton North MU'],
 ['AppleValley',
  'JVLD',
  'BASIS Independent Silicon Valley In',
  'CA/US',
  'Shreyas Kapavarapu',
  'BASIS Independent Silicon Valley Independent SK'],
 ['AppleValley',
  'JVLD',
  'Bergen County Academies',
  'NJ/US',
  'Andrew Kim',
  'Bergen County Academies AK'],
 ['AppleValley',
  'JVLD',
  'Bettendorf High School',
  'IA/US',
  'Noah Rantilla',
  'Bettendorf NR'],
 ['AppleValley',
  'JVLD',
  'Brentwood School',
  'CA/US',
  'Sophie Rubin',
  'Brentwood SR'],
 ['AppleValley',
  'JVLD',
  'Byram Hills High School',
  'NY/US',
  'Eleanor Wangens

#### Info

Get a tournament ID and get the tournament info.

In [25]:
def getInfo(tourn_id):

    url = "https://www.tabroom.com/index/tourn/index.mhtml?tourn_id=" + tourn_id

    # load page
    html = urlopen(url).read()
    soup = BeautifulSoup(html, "html.parser")

    # find header
    header = soup.select('h5')[0].text.strip()
    
    # get sub-header
    year = header.split('—')[0].strip()
    location = header.split('—')[1].strip()
    if ',' in location:
        city = location.split(',')[0].strip()
        state = location.split(',')[1].strip()
    
    else:
        city = "None"
        state = location

    # get info box
    info = soup.find_all('span', {'class' : 'smaller half'})[0].text
    date = ' '.join(info.split())

    return [date, year, city, state]

In [26]:
getInfo("16856")

['11/6 to 11/8', '2020', 'NSDA Campus', 'MN/US']

#### Execution

Loop through tournaments.

In [28]:
# read tourn list
with open(TOURNAMENT_CSV, 'r') as tourn_file,  open(OUTPATH + ENTRIES_FILE, 'w') as out_file:
    
    tourn_reader = csv.DictReader(tourn_file)
    tournWriter = csv.writer(out_file,
                                    lineterminator = "\n")
    
    tournWriter.writerow(["Tournament", "Event", "School", "State", "Name", "Code", "Status"])

    for tourn in tourn_reader:
        tourn_name = tourn["Name"]
        tourn_id = tourn["URL"]
        
        print("Checking " + tourn_name)
            
        events = getEvents(tourn_id)

        for event in events:

            eventType = getType(event[0])
            print(event[0], "-->", eventType)
                
            if eventType in TARGET_EVENTS:
                    
                eventURL = "https://www.tabroom.com/" + event[1]

                tournWriter.writerows(getEntries(eventURL, eventType, tourn_name))
                
                print("Scraped", eventType, "for", tourn_name)

Checking alta15
LD --> VLD
Scraped VLD for alta15
PF --> VPF
Scraped VPF for alta15
Policy --> VCX
Scraped VCX for alta15
Checking alta16
LD --> VLD
Scraped VLD for alta16
PF --> VPF
Scraped VPF for alta16
Policy --> VCX
Scraped VCX for alta16
Checking alta17
LD --> VLD
Scraped VLD for alta17
PF --> VPF
Scraped VPF for alta17
Policy --> VCX
Scraped VCX for alta17
Checking alta18
LD --> VLD
Scraped VLD for alta18
PF --> VPF
Scraped VPF for alta18
Policy --> VCX
Scraped VCX for alta18
Checking alta19
LD --> VLD
Scraped VLD for alta19
PF --> VPF
Scraped VPF for alta19
Policy --> VCX
Scraped VCX for alta19
Checking alta20
LD --> VLD
Scraped VLD for alta20
PF --> VPF
Scraped VPF for alta20
Policy --> VCX
Scraped VCX for alta20
Checking applevalley15
Round Robin --> None
Varsity LD --> VLD
Scraped VLD for applevalley15
Varsity Public Forum --> VPF
Scraped VPF for applevalley15
Checking applevalley16
Varsity LD --> VLD
Scraped VLD for applevalley16
Varsity Public Forum --> VPF
Scraped VPF for

Big Questions --> None
Congressional Debate --> None
Congressional Round Robin --> None
Lincoln Douglas --> VLD
Scraped VLD for blake19
Policy Debate --> VCX
Scraped VCX for blake19
Public Forum --> VPF
Scraped VPF for blake19
Public Forum Round Robin --> None
World Schools --> None
Checking blake20
Congressional Debate --> None
Lincoln Douglas --> VLD
Scraped VLD for blake20
Middle School/Novice Policy --> NCX
Scraped NCX for blake20
Policy Debate --> VCX
Scraped VCX for blake20
Public Forum --> VPF
Scraped VPF for blake20
Public Forum Round Robin --> None
World Schools --> None
Checking bluekey19
Varsity Lincoln Douglas --> VLD
Scraped VLD for bluekey19
Varsity Public Forum --> VPF
Scraped VPF for bluekey19
Checking bluekey20
Dramatic Interpretation --> None
Extemporaneous Speaking --> None
Humorous Interpretation --> None
Informative Speaking --> None
Novice Lincoln Douglas --> NLD
Scraped NLD for bluekey20
Novice Public Forum --> NPF
Scraped NPF for bluekey20
Original Oratory --> N

Scraped VCX for churchill17
Championship L-D --> VLD
Scraped VLD for churchill17
Championship Public Forum Debate --> VPF
Scraped VPF for churchill17
Novice L-D --> None
Novice Public Forum Debate --> NPF
Scraped NPF for churchill17
Checking churchill18
Championship CX --> VCX
Scraped VCX for churchill18
Championship LD --> VLD
Scraped VLD for churchill18
Novice CX --> NCX
Scraped NCX for churchill18
Novice LD --> NLD
Scraped NLD for churchill18
Novice PF --> NPF
Scraped NPF for churchill18
Public Forum --> VPF
Scraped VPF for churchill18
Checking churchill19
Championship C-X --> VCX
Scraped VCX for churchill19
Championship L-D --> VLD
Scraped VLD for churchill19
Championship Public Forum Debate --> VPF
Scraped VPF for churchill19
Novice C-X --> NCX
Scraped NCX for churchill19
Novice L-D --> None
Novice Public Forum Debate --> NPF
Scraped NPF for churchill19
Checking churchill20
Championship C-X --> VCX
Scraped VCX for churchill20
Championship L-D --> VLD
Scraped VLD for churchill20
Ch

Congressional Debate --> None
Dramatic Interpretation --> None
Duo Interpretation --> None
Extemporaneous Speaking --> None
Humorous Interpretation --> None
Impromptu Speaking --> None
Informative Speaking --> None
LD Challenge --> JVLD
Scraped JVLD for durham20
LD Debate --> VLD
Scraped VLD for durham20
Oral Interpretation --> None
Original Oratory --> None
Program of Oral Interpretation --> None
Public Forum Challenge --> JVPF
Scraped JVPF for durham20
Public Forum Debate --> VPF
Scraped VPF for durham20
Checking emory16
Lincoln-Douglas --> VLD
Scraped VLD for emory16
Pelham Debate --> None
Public Forum --> VPF
Scraped VPF for emory16
Checking emory17
Lincoln-Douglas --> VLD
Scraped VLD for emory17
Pelham Debate --> None
Public Forum --> VPF
Scraped VPF for emory17
Checking emory18
Lincoln-Douglas --> VLD
Scraped VLD for emory18
Pelham Debate --> None
Public Forum --> VPF
Scraped VPF for emory18
Checking emory19
Congressional Debate --> None
Lincoln-Douglas --> VLD
Scraped VLD for em

Scraped NPF for harvard17
Varsity Lincoln Douglas --> VLD
Scraped VLD for harvard17
Varsity Policy --> VCX
Scraped VCX for harvard17
Varsity Public Forum --> VPF
Scraped VPF for harvard17
Checking harvard18
JVLD --> JVLD
Scraped JVLD for harvard18
JVP --> None
JVPF --> JVPF
Scraped JVPF for harvard18
NPF --> NPF
Scraped NPF for harvard18
VLD --> VLD
Scraped VLD for harvard18
VPD --> None
VPF --> VPF
Scraped VPF for harvard18
Checking harvard19
Dramatic Interp --> None
Duo Interpretation --> None
Extemp --> None
Humorous Interp --> None
Informative Speaking --> None
JVCX --> JVCX
Scraped JVCX for harvard19
JVLD --> JVLD
Scraped JVLD for harvard19
JVPF --> JVPF
Scraped JVPF for harvard19
MSPF --> None
NPF --> NPF
Scraped NPF for harvard19
Original Oratory --> None
Program of Oral Interpretation --> None
VCX --> VCX
Scraped VCX for harvard19
VLD --> VLD
Scraped VLD for harvard19
VPF --> VPF
Scraped VPF for harvard19
Checking harvard20
Dramatic Interp --> None
Duo Interpretation --> None
E

Congressional Debate --> None
Declamation Speaking --> None
Dramatic Interpretation --> None
Duo Interpretation --> None
Extemporaneous Speaking --> None
Humorous Interpretation --> None
Impromptu Speaking --> None
Informative Speaking --> None
Novice LD Debate --> NLD
Scraped NLD for isidore16
Novice Policy Debate --> NCX
Scraped NCX for isidore16
Oral Interpretation --> None
Original Oratory --> None
Program of Oral Interpretation --> None
Public Forum Debate --> VPF
Scraped VPF for isidore16
Varsity LD Debate --> VLD
Scraped VLD for isidore16
Varsity Policy Debate --> VCX
Scraped VCX for isidore16
Checking isidore17
Congressional Debate --> None
Declamation Speaking --> None
Dramatic Interpretation --> None
Duo Interpretation --> None
Extemporaneous Speaking --> None
Humorous Interpretation --> None
Impromptu Speaking --> None
Informative Speaking --> None
Novice LD Debate --> NLD
Scraped NLD for isidore17
Novice Policy Debate --> NCX
Scraped NCX for isidore17
Oral Interpretation --

Scraped NLD for longbeach17
Novice Oratorical Interpretation --> None
Novice Original Advocacy --> None
Novice Original Oratory --> None
Novice Original Prose and Poetry --> None
Novice Policy --> NCX
Scraped NCX for longbeach17
Novice Presiding Officer --> None
Novice Public Forum --> NPF
Scraped NPF for longbeach17
Novice Thematic Interpretation --> None
Open Congress --> None
Open Dramatic Interpretation --> None
Open Duo Interpretation --> None
Open Expository --> None
Open Extemporaneous --> None
Open Humorous Interpretation --> None
Open Impromptu --> None
Open Lincoln-Douglas - CA --> JVLD
Scraped JVLD for longbeach17
Open Lincoln-Douglas - TOC --> VLD
Scraped VLD for longbeach17
Open Oratorical Interpretation --> None
Open Original Advocacy --> None
Open Original Oratory --> None
Open Original Prose and Poetry --> None
Open Policy --> VCX
Scraped VCX for longbeach17
Open Presiding Officer --> None
Open Public Forum --> VPF
Scraped VPF for longbeach17
Open Thematic Interpretatio

Scraped VPF for middleton16
Sat Congress --> None
Checking middleton17
Fri Congress --> None
Lincoln Douglas --> VLD
Scraped VLD for middleton17
Novice Policy --> NCX
Scraped NCX for middleton17
Open Policy --> VCX
Scraped VCX for middleton17
Public Forum --> VPF
Scraped VPF for middleton17
Sat Congress --> None
Checking middleton18
Fri Congress --> None
Lincoln Douglas --> VLD
Scraped VLD for middleton18
Novice Policy --> NCX
Scraped NCX for middleton18
Open Policy --> VCX
Scraped VCX for middleton18
Public Forum --> VPF
Scraped VPF for middleton18
Sat Congress --> None
Checking middleton19
Lincoln Douglas --> VLD
Scraped VLD for middleton19
Novice Policy --> NCX
Scraped NCX for middleton19
Open Policy --> VCX
Scraped VCX for middleton19
Public Forum --> VPF
Scraped VPF for middleton19
Checking middleton20
Lincoln Douglas --> VLD
Scraped VLD for middleton20
Novice Lincoln Douglas --> NLD
Scraped NLD for middleton20
Novice Policy --> NCX
Scraped NCX for middleton20
Novice Public Forum 

Scraped VCX for peninsula19
Oratorical Interpretation --> None
Original Oratory --> None
Original Prose/Poetry --> None
Parliamentary --> None
Program Oral Interpretation --> None
Public Forum --> VPF
Scraped VPF for peninsula19
Checking peninsula20
Congress --> None
Dramatic Interpretation --> None
Duo Interpretation --> None
Extemporaneous --> None
Humorous Interpretation --> None
Impromptu --> None
Informative --> None
Novice Lincoln-Douglas --> NLD
Scraped NLD for peninsula20
Novice Policy --> NCX
Scraped NCX for peninsula20
Open Lincoln-Douglas --> VLD
Scraped VLD for peninsula20
Open Policy --> VCX
Scraped VCX for peninsula20
Open Public Forum --> VPF
Scraped VPF for peninsula20
Oratorical Interpretation --> None
Original Oratory --> None
Original Prose/Poetry --> None
Parliamentary --> None
Program Oral Interpretation --> None
Checking penn15
Novice LD --> NLD
Scraped NLD for penn15
Parliamentary Debate --> None
Policy Debate --> VCX
Scraped VCX for penn15
Public Forum --> VPF
S

Scraped JVPF for pugetsound18
LD JV --> JVLD
Scraped JVLD for pugetsound18
LD Novice --> NLD
Scraped NLD for pugetsound18
LD Open --> VLD
Scraped VLD for pugetsound18
Novice Public Forum --> NPF
Scraped NPF for pugetsound18
Open Public Forum --> VPF
Scraped VPF for pugetsound18
Oratory Novice --> None
Oratory Open --> None
Checking pugetsound19
CX Open --> VCX
Scraped VCX for pugetsound19
Congress Novice --> None
Congress Open --> None
DUO Novice --> None
DUO Open --> None
Dramatic Interp Novice --> None
Dramatic Interp Open --> None
Expository Novice --> None
Expository Open --> None
Extemp Novice --> None
Extemp Open --> None
Humorous Interp. Open --> None
Impromptu Novice --> None
Impromptu Open --> None
LD JV --> JVLD
Scraped JVLD for pugetsound19
LD Novice --> NLD
Scraped NLD for pugetsound19
LD Open --> VLD
Scraped VLD for pugetsound19
Novice Public Forum --> NPF
Scraped NPF for pugetsound19
Open Public Forum --> VPF
Scraped VPF for pugetsound19
Oratory Novice --> None
Oratory Op

Scraped JVPF for uk18
Middle School PF --> None
Novice Policy Debate --> NCX
Scraped NCX for uk18
Oral Interpretation --> None
Original Oratory --> None
PF Round Robin --> None
Program Oral Intrepretation --> None
Varsity LD --> VLD
Scraped VLD for uk18
Varsity PF --> VPF
Scraped VPF for uk18
Varsity Policy Debate --> VCX
Scraped VCX for uk18
Checking uk19
Congressional Debate --> None
Dramatic Interpretation --> None
Duo Interpretation --> None
Extemporaneous Speaking --> None
Humorous Interpretation --> None
Informative Speaking --> None
LD Varsity --> VLD
Scraped VLD for uk19
Oral Interpretation --> None
Original Oratory --> None
Policy - Novice --> NCX
Scraped NCX for uk19
Policy - Varsity --> VCX
Scraped VCX for uk19
Program Oral Intrepretation --> None
Public Forum - JV --> JVPF
Scraped JVPF for uk19
Public Forum - Middle School --> NPF
Scraped NPF for uk19
Public Forum - Varsity --> VPF
Scraped VPF for uk19
Checking uk20
Congressional Debate --> None
Dramatic Interpretation --> 

In [29]:
# read tourn list
with open(TOURNAMENT_CSV, 'r') as tourn_file,   open(OUTPATH + INFO_FILE, 'w') as out_file:
    
    tourn_reader = csv.DictReader(tourn_file)
    tournWriter = csv.writer(out_file,
                                    lineterminator = "\n")
    
    tournWriter.writerow(["Tourn Name", "Dates", "Year", "Location", "State"])

    for tourn in tourn_reader:
        tourn_name = tourn["Name"]
        tourn_id = tourn["URL"]
        
        print("Checking " + tourn_name)
        
        tournWriter.writerow([tourn_name] + getInfo(tourn_id))
        

Checking alta15
Checking alta16
Checking alta17
Checking alta18
Checking alta19
Checking alta20
Checking applevalley15
Checking applevalley16
Checking applevalley17
Checking applevalley18
Checking applevalley19
Checking applevalley20
Checking arthurschool20
Checking asu16
Checking asu17
Checking asu18
Checking asu19
Checking asu20
Checking asu21
Checking bethelpark17
Checking bethelpark18
Checking bethelpark19
Checking bethelpark20
Checking beltway17
Checking beltway16
Checking beltway15
Checking beltway14
Checking blake15
Checking blake16
Checking blake17
Checking blake18
Checking blake19
Checking blake20
Checking bluekey19
Checking bluekey20
Checking bronx15
Checking bronx16
Checking bronx17
Checking bronx18
Checking bronx19
Checking bronx20
Checking cal15
Checking cal16
Checking cal17
Checking cal18
Checking cal19
Checking cal20
Checking centralvalley15
Checking centralvalley16
Checking centralvalley17
Checking saintjames17
Checking wakeforest16
Checking churchill17
Checking churchi

### Judge Scraper

#### Collect Links

In [13]:
paradigm_links = []

# iterate through all judges by first name
for c in ascii_lowercase:
    
    print(c)
    
    url = "https://www.tabroom.com/index/paradigm.mhtml?search_first={char}&search_last=".format(char = c)
    
    # load page
    html = urlopen(url).read()
    soup = BeautifulSoup(html, "html.parser")
    
    # get all links
    links = [link.get("href") for link in soup.find_all("a")]
    print(len(links))
    
    # append all paradigm links
    paradigm_links += [link for link in links if "judge_person" in link]

a
3229
b
1217
c
1624
d
1386
e
1088
f
266
g
670
h
647
i
307
j
3011
k
1471
l
953
m
2278
n
993
o
188
p
747
q
83
r
1526
s
2507
t
1045
u
66
v
468
w
362
x
76
y
219
z
274


In [16]:
# save judgelinks
with open(OUTPATH + "judgeLinks.txt", 'w') as outFile:
    for link in set(paradigm_links):
        outFile.write(link + "\n")

#### Collect Records

In [11]:
paradigm_links = list(set([name.strip() for name in open(OUTPATH + "judgeLinks.txt", 'r')]))

In [12]:
def getRecords(url):
    
    judgeID = url.split("=")[1]
    
    # load page
    html = urlopen(url).read()
    soup = BeautifulSoup(html, "html.parser")
    name = " ".join(soup.find("span", {"class": "twothirds"}).text.strip().split()[:-1])
    table = soup.find("table")
    
    records = []
    for row in table.find_all("tr")[1:]:
        cols = row.find_all("td")
        tourn = cols[0].text.strip()
        date = cols[1].text.split()[1]
        event = cols[2].text.strip()
        roundNum= cols[3].span.text
        roundName = cols[3].a.text
        aff = cols[4].text.strip()
        neg = cols[5].text.strip()
        decision = cols[6].text.strip()
        panel = cols[7].text.strip()
        records.append({"Judge" : name,
                        "Judge ID" : judgeID,
                       "Tournament": tourn,
                       "Date" : date,
                       "Event" : event,
                       "Round Number" : roundNum,
                       "Round Name" : roundName,
                       "Aff" : aff,
                       "Neg" : neg,
                       "Decision" : decision,
                       "Panel" : panel})
        
    return records

In [24]:

with open(OUTPATH + "records2.csv", 'w') as outFile:
    
    outWriter  = csv.DictWriter(outFile,
                                fieldnames = ["Judge",
                                              "Judge ID",
                                            "Tournament",
                                            "Date",
                                            "Event",
                                            "Round Number",
                                            "Round Name",
                                            "Aff",
                                            "Neg", 
                                            "Decision",
                                            "Panel"],
                                quotechar='"', 
                                quoting=csv.QUOTE_NONNUMERIC,
                                lineterminator = "\n")
    
    outWriter.writeheader()
    
    count = 0
    for link in paradigm_links:
        count += 1
        url = "https://www.tabroom.com/index/" + link
        
        try:
        
            records = getRecords(url)
            if (count % 100 == 0):
                print(count)
            outWriter.writerows(records)
        except KeyboardInterrupt:
            sys.exit(0)
        except:
            print("Broke for " + url)


NameError: name 'paradigm_links' is not defined

In [10]:
len(paradigm_links[23000:])

TypeError: 'set' object is not subscriptable