Entries Analysis
---

Author: Peter Zhang

Analysis of tournament entries.

### Setup

#### Imports

In [31]:
# imports
import urllib.request, urllib.parse, urllib.error
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import csv
import os.path
from os import path
import sys
from string import ascii_lowercase
import math

#### Settings

In [144]:
ENTRIES_CSV = 'tab_data/edebate_entries.csv'

In [145]:
with open(ENTRIES_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    entries = [row for row in inReader]

In [13]:
STATES_CSV = 'tools/states.csv'

In [27]:
states = {}
with open(STATES_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    for row in inReader:
        states[row['State']] = (float(row['Lattitude']), float(row['Longitude']))

In [108]:
INFO_CSV = 'tab_data/edebate_info.csv'

In [109]:
tournInfo = {}
with open(INFO_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    for row in inReader:
        tournInfo[row["Tourn Name"]] = row

In [135]:
OUTFILE = 'results/edebate_analysis.csv'

### Analysis

#### Helpers

In [129]:
def getData(entries19, entries20, host):
    # count entries
    numEntries19 = len(entries19)
    numEntries20 = len(entries20)
    # get states
    states19 = set([getAbbrev(entry["State"]) for entry in entries19])
    states20 = set([getAbbrev(entry["State"]) for entry in entries20])
    # remove empty/unformatted states
    states19 = [state for state in states19 if state in states]
    states20 = [state for state in states20 if state in states]
    # find num of orig
    original = len([entry for entry in entries20 if getAbbrev(entry["State"]) in states19])
    avg19 = avgDist(entries19, host)
    avg20 = avgDist(entries20, host)
    newStates = [state for state in states20 if state not in states19]
    return [numEntries19,
            numEntries20,
           len(states19),
           len(states20),
           original, 
           avg19,
           avg20,
           newStates]

In [61]:
# computes distance between two sets of coords
def distance(coords1, coords2):
    return math.sqrt((coords1[0]-coords2[0])**2 + (coords1[1]-coords2[1])**2)

In [133]:
# computes average distance of entries from host state
def avgDist(entries, host):
    hostCoords = states[host]
    totalDist = 0
    for entry in entries:
        state = entry["State"]
        abbrev = getAbbrev(state)
        if abbrev in states:
            coords = states[abbrev]
            totalDist += distance(hostCoords, coords)
    return totalDist/len(entries)

In [131]:
def getAbbrev(state):
    return state.split("/")[0]

#### Example

In [63]:
dowling19 = [entry for entry in entries if entry["Tournament"] == "dowling19"]
dowling20 = [entry for entry in entries if entry["Tournament"] == "dowling20"]

In [64]:
len(dowling19)

46

In [65]:
len(dowling20)

58

In [66]:
set([entry["State"] for entry in dowling19])

{'IA/US', 'MN/US', 'MO/US', 'NE/US', 'SD/US'}

In [67]:
set([entry["State"] for entry in dowling20])

{'AL/US',
 'AZ/US',
 'CA/US',
 'IA/US',
 'MI/US',
 'MN/',
 'MN/US',
 'NE/US',
 'OR/US',
 'PA/US',
 'SD/US',
 'TX/',
 'TX/US',
 'WA/US',
 'WI/US',
 'WV/US'}

In [84]:
avgDist(dowling20, "IA")

8.200960034507185

In [82]:
getData(dowling19, dowling20, "IA")

[46,
 58,
 5,
 14,
 36,
 5.010976269007526,
 8.200960034507185,
 ['CA', 'WI', 'AZ', 'AL', 'OR', 'WA', 'MI', 'WV', 'TX', 'PA']]

#### Analysis

In [146]:
tourn_list = sorted(list(set([entry["Tournament"] for entry in entries])))

In [147]:
tourn_list

['UT19',
 'UT20',
 'alta19',
 'alta20',
 'applevalley19',
 'applevalley20',
 'bronx19',
 'bronx20',
 'dowling19',
 'dowling20',
 'glenbrooks19',
 'glenbrooks20',
 'grapevine19',
 'grapevine20',
 'greenhill19',
 'greenhill20',
 'holycross19',
 'holycross20',
 'isidore19',
 'isidore20',
 'jackhowe19',
 'jackhowe20',
 'loyola19',
 'loyola20',
 'meadows19',
 'meadows20',
 'presentation19',
 'presentation20',
 'princeton19',
 'princeton20',
 'strake19',
 'strake20',
 'valley19',
 'valley20',
 'yale19',
 'yale20']

In [148]:
entries_by_tourn = {}
for entry in entries:
    tourn = entry["Tournament"]
    if tourn in entries_by_tourn:
        entries_by_tourn[tourn].append(entry)
    else:
        entries_by_tourn[tourn] = [entry]

In [149]:
with open(OUTFILE, 'w') as outFile:
    outWriter = csv.writer(outFile, lineterminator = '\n')
    outWriter.writerow(["Tournament",
                        "Entries 2019",
                                           "Entries 2020",
                                           "States 2019",
                                           "States 2020",
                                           "Local 2020 Participants",
                                           "Avg Dist 2019",
                                           "Avg Dist 2020",
                                           "New States"])
    for i in range(len(tourn_list)//2):
        tourn19 = tourn_list[2*i]
        tourn20 = tourn_list[2*i+1]
        print(tourn19)
        print(tourn20)
        host = tournInfo[tourn19]["State"]
        host = getAbbrev(host)
        outWriter.writerow([tourn19[:-2]] + getData(entries_by_tourn[tourn19],
                entries_by_tourn[tourn20],
                host))

UT19
UT20
alta19
alta20
applevalley19
applevalley20
bronx19
bronx20
dowling19
dowling20
glenbrooks19
glenbrooks20
grapevine19
grapevine20
greenhill19
greenhill20
holycross19
holycross20
isidore19
isidore20
jackhowe19
jackhowe20
loyola19
loyola20
meadows19
meadows20
presentation19
presentation20
princeton19
princeton20
strake19
strake20
valley19
valley20
yale19
yale20
