Entries Analysis
---

Author: Peter Zhang

Analysis of tournament entries.

### Setup

#### Imports

In [273]:
# imports
import urllib.request, urllib.parse, urllib.error
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import csv
import os.path
from os import path
import sys
from string import ascii_lowercase
import math
import mpu

#### Settings

In [326]:
ENTRIES_CSV = 'tab_data/edebate_entries.csv'

In [327]:
with open(ENTRIES_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    entries = [row for row in inReader]

In [208]:
LOCATION_CSV = 'tools/locations.csv'

In [209]:
location = {}
with open(LOCATION_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    for row in inReader:
        location[row['State']] = (float(row['Lattitude']), float(row['Longitude']))

In [203]:
states = location.keys()

In [318]:
INFO_CSV = 'tab_data/edebate_info.csv'

In [319]:
tournInfo = {}
with open(INFO_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    for row in inReader:
        tournInfo[row["Tourn Name"]] = row

In [159]:
OUTFILE = 'results/edebate_analysis.csv'

In [265]:
OUTFILE2 = 'results/edebate_summary.csv'

In [212]:
ABBREV_CSV = 'tools/abbrev.csv'

In [213]:
abbrevOf = {}
with open(ABBREV_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    for row in inReader:
        abbrevOf[row['Name']] = row['Abbrev']

In [224]:
ACT_CSV = 'tools/actscores.csv'

In [239]:
acts = {}
with open(ACT_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    for row in inReader:
        state = row['State']
        if state in abbrevOf:
            acts[abbrevOf[state]] = float(row['Score'])

In [240]:
INCOME_CSV = 'tools/income.csv'

In [241]:
incomes = {}
with open(INCOME_CSV) as inFile:
    inReader = csv.DictReader(inFile)
    for row in inReader:
        state = row['State']
        if state in abbrevOf:
            income = row['Income'].replace(',', '')
            incomes[abbrevOf[state]] = int(income)

### Analysis

#### Helpers

In [257]:
def getData(entries19, entries20, host):
    # count entries
    numEntries19 = len(entries19)
    numEntries20 = len(entries20)
    # get states
    states19 = set([getAbbrev(entry["State"]) for entry in entries19])
    states20 = set([getAbbrev(entry["State"]) for entry in entries20])
    # remove empty/unformatted states
    states19 = [state for state in states19 if state in states]
    states20 = [state for state in states20 if state in states]
    # find num of orig
    original = len([entry for entry in entries20 if getAbbrev(entry["State"]) in states19])
    avgDist19 = avgDist(entries19, host)
    avgDist20 = avgDist(entries20, host)
    avgACT19 = avgACT(entries19)
    avgACT20 = avgACT(entries20)
    avgIncome19 = avgIncome(entries19)
    avgIncome20 = avgIncome(entries20)
    return [numEntries19,
            numEntries20,
           len(states19),
           len(states20),
           original, 
           avgDist19,
           avgDist20,
            avgACT19,
            avgACT20,
            avgIncome19,
            avgIncome20]

In [248]:
def avgACT(entries):
    scores = [acts[getAbbrev(entry['State'])] for entry in entries if getAbbrev(entry['State']) in states]
    return sum(scores)/len(scores)

In [251]:
def avgIncome(entries):
    households = [incomes[getAbbrev(entry['State'])] for entry in entries if getAbbrev(entry['State']) in states]
    return sum(households)/len(households)

In [274]:
# computes distance between two sets of coords
def distance(coords1, coords2):
    return mpu.haversine_distance(coords1, coords2)

In [262]:
# computes average distance of entries from host state
def avgDist(entries, host):
    hostCoords = location[host]
    totalDist = 0
    for entry in entries:
        state = entry["State"]
        abbrev = getAbbrev(state)
        if abbrev in states:
            coords = location[abbrev]
            totalDist += distance(hostCoords, coords)
    return totalDist/len(entries)

In [131]:
def getAbbrev(state):
    return state.split("/")[0]

#### Example

In [63]:
dowling19 = [entry for entry in entries if entry["Tournament"] == "dowling19"]
dowling20 = [entry for entry in entries if entry["Tournament"] == "dowling20"]

In [64]:
len(dowling19)

46

In [65]:
len(dowling20)

58

In [66]:
set([entry["State"] for entry in dowling19])

{'IA/US', 'MN/US', 'MO/US', 'NE/US', 'SD/US'}

In [67]:
set([entry["State"] for entry in dowling20])

{'AL/US',
 'AZ/US',
 'CA/US',
 'IA/US',
 'MI/US',
 'MN/',
 'MN/US',
 'NE/US',
 'OR/US',
 'PA/US',
 'SD/US',
 'TX/',
 'TX/US',
 'WA/US',
 'WI/US',
 'WV/US'}

In [84]:
avgDist(dowling20, "IA")

8.200960034507185

In [82]:
getData(dowling19, dowling20, "IA")

[46,
 58,
 5,
 14,
 36,
 5.010976269007526,
 8.200960034507185,
 ['CA', 'WI', 'AZ', 'AL', 'OR', 'WA', 'MI', 'WV', 'TX', 'PA']]

#### Tournament Analysis

In [328]:
tourn_list = sorted(list(set([entry["Tournament"] for entry in entries])))

In [329]:
tourn_list

['UT19',
 'UT20',
 'alta19',
 'alta20',
 'applevalley19',
 'applevalley20',
 'blake19',
 'blake20',
 'bronx19',
 'bronx20',
 'collegeprep19',
 'collegeprep20',
 'cypress19',
 'cypress20',
 'dowling19',
 'dowling20',
 'duke19',
 'duke20',
 'glenbrooks19',
 'glenbrooks20',
 'grapevine19',
 'grapevine20',
 'greenhill19',
 'greenhill20',
 'heritage19',
 'heritage20',
 'holycross19',
 'holycross20',
 'isidore19',
 'isidore20',
 'jackhowe19',
 'jackhowe20',
 'loyola19',
 'loyola20',
 'meadows19',
 'meadows20',
 'presentation19',
 'presentation20',
 'princeton19',
 'princeton20',
 'ridge19',
 'ridge20',
 'seattle19',
 'seattle20',
 'strake19',
 'strake20',
 'uk19',
 'uk20',
 'valley19',
 'valley20',
 'yale19',
 'yale20']

In [330]:
entries_by_tourn = {}
for entry in entries:
    tourn = entry["Tournament"]
    if tourn in entries_by_tourn:
        entries_by_tourn[tourn].append(entry)
    else:
        entries_by_tourn[tourn] = [entry]

In [332]:
with open(OUTFILE, 'w') as outFile:
    outWriter = csv.writer(outFile, lineterminator = '\n')
    outWriter.writerow(["Tournament",
                        "Entries 2019",
                                           "Entries 2020",
                                           "States 2019",
                                           "States 2020",
                                           "Local 2020 Participants",
                                           "Avg Dist 2019",
                                           "Avg Dist 2020",
                                           "Avg ACT 2019",
                       "Avg ACT 2020",
                       "Avg Income 2019",
                       "Avg Income 2020"])
    for i in range(len(tourn_list)//2):
        tourn19 = tourn_list[2*i]
        tourn20 = tourn_list[2*i+1]
        print(tourn19)
        print(tourn20)
        host = tournInfo[tourn19]["State"]
        host = getAbbrev(host)
        if host in abbrevOf:
            host = abbrevOf[host]
        outWriter.writerow([tourn19[:-2]] + getData(entries_by_tourn[tourn19],
                entries_by_tourn[tourn20],
                host))

UT19
UT20
alta19
alta20
applevalley19
applevalley20
blake19
blake20
bronx19
bronx20
collegeprep19
collegeprep20
cypress19
cypress20
dowling19
dowling20
duke19
duke20
glenbrooks19
glenbrooks20
grapevine19
grapevine20
greenhill19
greenhill20
heritage19
heritage20
holycross19
holycross20
isidore19
isidore20
jackhowe19
jackhowe20
loyola19
loyola20
meadows19
meadows20
presentation19
presentation20
princeton19
princeton20
ridge19
ridge20
seattle19
seattle20
strake19
strake20
uk19
uk20
valley19
valley20
yale19
yale20


#### Summary Analysis

In [333]:
state_entries19 = dict([[state, 0] for state in states])
state_entries20 = dict([[state, 0] for state in states])
for entry in entries:
    state = getAbbrev(entry['State'])
    if state in states:
        if "19" in entry['Tournament']:
            state_entries19[state] += 1
        else:
            state_entries20[state] += 1

In [334]:
state_entries19

{'AK': 1,
 'AL': 28,
 'AR': 19,
 'AZ': 12,
 'CA': 528,
 'CO': 10,
 'CT': 3,
 'DC': 5,
 'DE': 0,
 'FL': 162,
 'GA': 20,
 'HI': 0,
 'IA': 68,
 'ID': 11,
 'IL': 48,
 'IN': 4,
 'KS': 0,
 'KY': 9,
 'LA': 32,
 'MA': 69,
 'MD': 31,
 'ME': 0,
 'MI': 0,
 'MN': 80,
 'MO': 3,
 'MS': 4,
 'MT': 0,
 'NC': 73,
 'ND': 1,
 'NE': 38,
 'NH': 2,
 'NJ': 128,
 'NM': 9,
 'NV': 5,
 'NY': 210,
 'OH': 25,
 'OK': 13,
 'OR': 13,
 'PA': 52,
 'PR': 0,
 'RI': 2,
 'SC': 0,
 'SD': 10,
 'TN': 0,
 'TX': 537,
 'UT': 34,
 'VA': 34,
 'VT': 0,
 'WA': 52,
 'WI': 7,
 'WV': 0,
 'WY': 1}

In [335]:
state_entries20

{'AK': 4,
 'AL': 18,
 'AR': 15,
 'AZ': 42,
 'CA': 840,
 'CO': 12,
 'CT': 2,
 'DC': 16,
 'DE': 0,
 'FL': 183,
 'GA': 30,
 'HI': 6,
 'IA': 80,
 'ID': 14,
 'IL': 14,
 'IN': 1,
 'KS': 0,
 'KY': 2,
 'LA': 23,
 'MA': 104,
 'MD': 27,
 'ME': 22,
 'MI': 7,
 'MN': 99,
 'MO': 3,
 'MS': 5,
 'MT': 0,
 'NC': 77,
 'ND': 2,
 'NE': 26,
 'NH': 11,
 'NJ': 128,
 'NM': 21,
 'NV': 12,
 'NY': 204,
 'OH': 26,
 'OK': 16,
 'OR': 27,
 'PA': 72,
 'PR': 0,
 'RI': 0,
 'SC': 5,
 'SD': 12,
 'TN': 0,
 'TX': 539,
 'UT': 41,
 'VA': 18,
 'VT': 0,
 'WA': 65,
 'WI': 18,
 'WV': 6,
 'WY': 2}

In [339]:
state_distance19 = dict([[state, 0] for state in states])
state_distance20 = dict([[state, 0] for state in states])
for entry in entries:
    state = getAbbrev(entry['State'])
    host = getAbbrev(tournInfo[entry['Tournament'][:-2] + "19"]['State'])
    if host in abbrevOf:
        host = abbrevOf[host]
    if state in states:
        if "19" in entry['Tournament']:
            state_distance19[state] += distance(location[state], location[host])
        else:
            state_distance20[state] += distance(location[state], location[host])

In [340]:
for state in states:
    if state_entries19[state] > 0:
        state_distance19[state] /= state_entries19[state]
    if state_entries20[state] > 0:
        state_distance20[state] /= state_entries20[state]

In [341]:
state_distance19

{'AK': 3794.66588684453,
 'AL': 843.4358902131935,
 'AR': 713.0352579788396,
 'AZ': 1215.7616307813705,
 'CA': 585.3362851459615,
 'CO': 861.4520827641987,
 'CT': 204.45602935828614,
 'DC': 391.4223285519889,
 'DE': 0,
 'FL': 1451.9750719403758,
 'GA': 1138.8366664385399,
 'HI': 0,
 'IA': 521.3765306264916,
 'ID': 609.370961010137,
 'IL': 603.3391424257138,
 'IN': 320.641179270537,
 'KS': 0,
 'KY': 145.09943175566664,
 'LA': 384.50631218909774,
 'MA': 671.2707434492746,
 'MD': 633.3209276233388,
 'ME': 0,
 'MI': 0,
 'MN': 173.079424944642,
 'MO': 448.3624287699663,
 'MS': 779.4151745256167,
 'MT': 0,
 'NC': 522.9160713372879,
 'ND': 486.23802564677806,
 'NE': 613.5624434209376,
 'NH': 318.4600583818212,
 'NJ': 355.270752193006,
 'NM': 1483.0708035044254,
 'NV': 924.3811076219893,
 'NY': 606.2835625799936,
 'OH': 648.2205561513574,
 'OK': 464.6001924151335,
 'OR': 1511.4538808859738,
 'PA': 642.794869277339,
 'PR': 0,
 'RI': 1950.6009578532394,
 'SC': 0,
 'SD': 564.3798201170046,
 'TN':

In [342]:
state_distance20

{'AK': 4377.352947270115,
 'AL': 1047.8775747879213,
 'AR': 821.384337824888,
 'AZ': 2300.939499810653,
 'CA': 1139.1677768968075,
 'CO': 1609.3892172152616,
 'CT': 204.45602935828612,
 'DC': 919.3526356807411,
 'DE': 0,
 'FL': 1574.1438264961225,
 'GA': 1480.1135747441028,
 'HI': 4720.6410879226705,
 'IA': 975.4480649279542,
 'ID': 1191.6141440189108,
 'IL': 850.0650798627232,
 'IN': 1054.5507512471568,
 'KS': 0,
 'KY': 0.0,
 'LA': 832.4195845808132,
 'MA': 1544.7058995245295,
 'MD': 1146.8170321042749,
 'ME': 3457.8945502025035,
 'MI': 1286.4790124097524,
 'MN': 585.2660725042496,
 'MO': 719.6220490673442,
 'MS': 548.1909412643837,
 'MT': 0,
 'NC': 669.1243038257071,
 'ND': 486.23802564677806,
 'NE': 643.9419575715933,
 'NH': 2988.0368941969264,
 'NJ': 1002.0903664901723,
 'NM': 1398.4550325011646,
 'NV': 1322.6609359616502,
 'NY': 1155.9806663847526,
 'OH': 582.5671899791068,
 'OK': 653.5821674521171,
 'OR': 1831.6221019427544,
 'PA': 1006.4433708414201,
 'PR': 0,
 'RI': 0,
 'SC': 7

In [343]:
with open(OUTFILE2, 'w') as outFile:
    outWriter = csv.writer(outFile, lineterminator = '\n')
    outWriter.writerow(["State",
                      "Entries19",
                      "Entries20",
                      "Distance19",
                      "Distance20"])
    for state in states:
        outWriter.writerow([state,
                           state_entries19[state],
                           state_entries20[state],
                           state_distance19[state],
                           state_distance20[state]])

In [344]:
set([entry["School"] for entry in entries])

{'++--+-Mission San Jose High School',
 'AHS Independent',
 'Academy of Classical Christian Stud',
 'Academy of Higher Learning',
 'Achievement First Brooklyn HS',
 'Acton-Boxborough RHS Independent',
 'Acton-Boxborough Regional HS',
 'Acton-Boxborough Regional High Scho',
 'Advanced Technologies',
 'Advanced Technologies Acad',
 'Agoura High School',
 'Airline HS',
 'Airline High School',
 'Albuquerque Academy',
 'Alief Taylor',
 'All Saints Episcopal School',
 "Alphan's Academy",
 'American Heritage Boca/Delray HS',
 'American Heritage Broward HS',
 'American Heritage Plantation HS',
 'American High',
 'American Independent',
 'Anderson High School',
 'Apex Friendship High School',
 'Apple Valley HS',
 'Apple Valley High School',
 'Appleton East HS',
 'Appleton North',
 'Appleton North High School',
 'Aragon High School',
 'Aragon Independent',
 'Archbishop Mitty',
 'Archbishop Mitty High School',
 'Ardrey Kell High School',
 'Ardsley Debate',
 'Ardsley High School',
 'Asheville High

In [345]:
schoolNew = {}
schoolAll = {}
for i in range(len(tourn_list)//2):
    tourn19 = tourn_list[2*i]
    tourn20 = tourn_list[2*i+1]
    entries19 = entries_by_tourn[tourn19]
    entries20 = entries_by_tourn[tourn20]
    oldSchools = [entry['School'] for entry in entries19]
    for entry in entries20:
        school = entry['School']
        if school in schoolAll:
            schoolAll[school] += 1
        else:
            schoolAll[school] = 1
        
        if school not in oldSchools:
            if school in schoolNew:
                schoolNew[school] += 1
            else:
                schoolNew[school] = 1

In [346]:
new = 0
for school in schoolNew:
    print(school)
    ratio = schoolNew[school] / schoolAll[school]
    print(schoolNew[school])
    print(ratio)
    if ratio == 1:
        new += 1

Centennial HS
3
1.0
Claudia Taylor Johnson High School
9
1.0
Fremont High School
9
1.0
Harlingen HS South
1
1.0
Heights High School
6
1.0
J. Frank Dobie High School
1
1.0
Jack C. Hays
2
1.0
L C Anderson High School
20
1.0
Lake Travis High School
1
1.0
Legacy Christian Academy
3
0.375
Little Rock Central
4
0.4444444444444444
Los Alamos High School
1
1.0
Louis D. Brandeis
1
1.0
McMillen High School
3
1.0
McNeil High School
13
1.0
Memorial High School
6
1.0
Norman North High School
7
1.0
Northland Christian School
3
0.375
Northview High School
15
0.7894736842105263
Plano East Sr. High
4
1.0
Plano West Sr High School
17
1.0
Reagan HS
3
1.0
Texas City High School
2
1.0
The Quarry Lane School
7
0.7
The Village High School
3
1.0
The Woodlands High School
7
1.0
Unionville
13
0.65
Westwood
27
1.0
Wylie Sr High School
1
1.0
Albuquerque Academy
13
0.7647058823529411
BASIS Independent Silicon Valley In
9
1.0
Bishop's
12
1.0
Blackfoot High School
1
1.0
Brentwood School
7
0.28
Cardinal Gibbons HS In

In [347]:
schoolNew

{'Centennial HS': 3,
 'Claudia Taylor Johnson High School': 9,
 'Fremont High School': 9,
 'Harlingen HS South': 1,
 'Heights High School': 6,
 'J. Frank Dobie High School': 1,
 'Jack C. Hays': 2,
 'L C Anderson High School': 20,
 'Lake Travis High School': 1,
 'Legacy Christian Academy': 3,
 'Little Rock Central': 4,
 'Los Alamos High School': 1,
 'Louis D. Brandeis': 1,
 'McMillen High School': 3,
 'McNeil High School': 13,
 'Memorial High School': 6,
 'Norman North High School': 7,
 'Northland Christian School': 3,
 'Northview High School': 15,
 'Plano East Sr. High': 4,
 'Plano West Sr High School': 17,
 'Reagan HS': 3,
 'Texas City High School': 2,
 'The Quarry Lane School': 7,
 'The Village High School': 3,
 'The Woodlands High School': 7,
 'Unionville': 13,
 'Westwood': 27,
 'Wylie Sr High School': 1,
 'Albuquerque Academy': 13,
 'BASIS Independent Silicon Valley In': 9,
 "Bishop's": 12,
 'Blackfoot High School': 1,
 'Brentwood School': 7,
 'Cardinal Gibbons HS Independent': 2,


In [348]:
new

304

In [349]:
len(schoolAll)

423