In [1]:
import numpy as np
import pandas as pd
import glob
import os
import fuzzywuzzy as fuzz
from fuzzywuzzy import process



In [2]:
### Define column headers for csv export
columns_21 = ['Event', 'School', 'TEA', 'City', 'Directors', 'Conference', 'Classification', 'Year', 'ID', 
    'Stage Judge 1', 'Stage Judge 2', 'Stage Judge 3', 'Stage Final', 
    'SR Judge 1', 'SR Judge 2', 'SR Judge 3', 'SR Final', 'Award', 
    'Selection 1', 'Selection 2', 'Selection 3', 'Date', 'Region', 'cj1', 'cj2', 'cj3', 'srj1', 'srj2', 'srj3']
columns_22 = ['Event', 'School', 'TEA', 'City', 'Directors', 'Conference', 'Classification', 'Year', 'ID', 
    'Stage Judge 1', 'Stage Judge 2', 'Stage Judge 3', 'Stage Final', 
    'SR Judge 1', 'SR Judge 2', 'SR Judge 3', 'SR Final', 'Award', 
    'Selection 1', 'Selection 2', 'Selection 3', 'Date', 'Region', 'cj1', 'cj2', 'cj3', 'srj1', 'srj2', 'srj3', 'oops']

## Group the CSVs together, and fix rows with Accompanist error

In [3]:
# list of merged files returned
files = glob.glob("full_run/*.csv")

# joining files with concat and read_csv
df = pd.concat(map(pd.read_csv, files), ignore_index=True)
df = pd.DataFrame(df)

# drop unnamed column
df.drop(columns=['Unnamed: 0'], inplace=True)

# name the columns
df.columns = columns_22

In [4]:
# select rows with acc column 
oops_df = df[df['oops'].notnull()]

# delete the acc columns and fix column names
oops_fix_df = oops_df.drop(columns=['Conference'])
oops_fix_df.columns = columns_21

# drop acc rows from df
df = df.loc[df['oops'].isnull() == True]

# add fixed df to df
df = pd.concat([df, oops_fix_df], ignore_index=True)

# drop oops column
df.drop(columns=['oops'], inplace=True)

# drop rows where Event contains '9'
df = df[df['Event'].str.contains('9') == False]

df.describe()

Unnamed: 0,Event,School,TEA,City,Directors,Conference,Classification,Year,ID,Stage Judge 1,...,Selection 2,Selection 3,Date,Region,cj1,cj2,cj3,srj1,srj2,srj3
count,60803,60803,60803,60803,60803,60803,60803,60803,60803,60803,...,60803,60803,60803,60803,60803,60803,60803,60803,60803,60625
unique,1,6236,2373,1498,24456,28,39,210,60631,13,...,8975,11894,499,155,1116,1219,1196,1230,1344,1203
top,100-Concert Band,Allen High School,TEA:,San Antonio,Joe Martinez,CC,Varsity,2019,1,1,...,Colliding Visions (Balmages/ ),"Moscow, 1941 (Balmages)",DATE of EVENT 04/17/2018,Region: 8,1. Keith Bearden,2. Cindy Lansford,3. Randy Vaughn,1. Phil Anthony,2. Tye Ann Payne,3. Rick Yancey
freq,60803,112,56877,2882,29,14895,31202,3274,78,25874,...,422,446,714,3135,709,463,843,448,341,736


In [5]:
# select rows where selection 3 is null
df_clean = df
df_clean = df_clean[df_clean['Selection 3'].isnull() == False]
len(df_clean)

60803

In [6]:
# Trim whitespace, double spaces, commas, and periods from selections
selection_columns = ['Selection 1', 'Selection 2', 'Selection 3']

for i in selection_columns:
    df_clean[i] = df_clean[i].str.strip()
    df_clean[i] = df_clean[i].str.replace('  ', ' ', regex=False)
    df_clean[i] = df_clean[i].str.replace(',', '', regex=False)
    df_clean[i] = df_clean[i].str.replace('.', '', regex=False)

# Trim whitespace from classification column
df_clean['Classification'] = df_clean['Classification'].str.strip()

# Remove composer/arranger information from selections
for column in selection_columns:
    # remove all inside parenthesis
    df_clean[f"{column} Comp/arr"] = df_clean[column].str.extract(r'\((.*?)\)', expand=False)
    df_clean[column] = df_clean[column].str.replace('\(.*\)', '', regex=True)
    # trim whitespace
    df_clean[column] = df_clean[column].str.strip()

In [7]:
# drop rows where conference contains 'Acc'
df_clean = df_clean[df_clean['Conference'].str.contains('Acc') == False]


In [8]:
# Fix names of some conferences
df_clean['Conference'] = df_clean['Conference'].replace('2C', 'CC')
df_clean['Conference'] = df_clean['Conference'].replace('4A', 'AAAA')
df_clean['Conference'] = df_clean['Conference'].replace('cc', 'CC')
df_clean['Conference'] = df_clean['Conference'].replace('1C', 'C')

In [9]:
# Make df_clean column integers
df_clean['Year'] = df_clean['Year'].astype(int)

In [10]:
judging_columns = ['Stage Judge 1', 'Stage Judge 2', 'Stage Judge 3', 'Stage Final', 'SR Judge 1', 
                'SR Judge 2', 'SR Judge 3', 'SR Final']
numbers = ['1', '2', '3', '4', '5']

for n in numbers:
    for j in judging_columns:
        df_clean.loc[df_clean[j] == n, j] = int(n)




In [11]:
# Trim whitespace, double spaces, commas, and periods from selections
selection_columns = ['Selection 1', 'Selection 2', 'Selection 3']

for i in selection_columns:
    df_clean[i] = df_clean[i].str.strip()
    df_clean[i] = df_clean[i].str.replace('  ', ' ', regex=False)
    df_clean[i] = df_clean[i].str.replace(',', '', regex=False)
    df_clean[i] = df_clean[i].str.replace('.', '', regex=False)

# Trim whitespace from classification column
df_clean['Classification'] = df_clean['Classification'].str.strip()

# Remove composer/arranger information from selections
for column in selection_columns:
    # remove all inside parenthesis
    df_clean[column] = df_clean[column].str.replace('\(.*\)', '', regex=True)
    # trim whitespace
    df_clean[column] = df_clean[column].str.strip()


In [12]:
df_clean.columns = df_clean.columns.str.strip()

In [13]:
# Drop Event and TEA column
df = df.drop(columns=['Event', 'TEA'])
print(len(df))

60803


In [14]:
df_clean['School'] = df_clean['School'].str.replace("Junior High School", 'JH', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Junior High", 'JH', regex=True)
df_clean['School'] = df_clean['School'].str.replace("JuniorHigh", 'JH', regex=True)
df_clean['School'] = df_clean['School'].str.replace("JH School", 'JH', regex=True)
df_clean['School'] = df_clean['School'].str.replace("High School", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("high School", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("High school", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("high school", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Middle School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("HighSchool", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("MiddleSchool", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Intermediate School", 'IS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Intermediat School", 'IS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Midle School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Midddle School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("MS School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("M.S. School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("HIgh School", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("MIddle School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Middle 7 School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Higjh School", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Niddle School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("middles School", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("H.S.", 'HS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("M.S.", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("Middle SChool", 'MS', regex=True)
df_clean['School'] = df_clean['School'].str.replace("HS Band", 'HS', regex=True)

In [15]:
df_clean['School'].describe()

count         60801
unique         4712
top        Lamar MS
freq            144
Name: School, dtype: object

In [16]:
school_df = df_clean[df_clean['School'].str.contains('HS') == False]
school_df = school_df[school_df['School'].str.contains('MS') == False]
school_df = school_df[school_df['School'].str.contains('JH') == False]
print(school_df['School'].unique())

[' 0' ' Little Cypress-Mauriceville' ' West Hardin CCISD' ...
 ' G.W. Carver Academy' ' Tioga' ' Townview Center']


In [17]:
# Remove DNA, DQ
for i in judging_columns:
    df_clean = df_clean[df_clean[i] != 'DNA']
    df_clean = df_clean[df_clean[i] != 'DQ']

# Convert blanks to nans
for i in judging_columns:
    df_clean[i] = df_clean[i].replace(['', ' '], np.nan)

print(len(df_clean))

60269


In [18]:
# Average together Stage Judge 1, 2, and 3 into a new column

df_clean['Stage Average'] = (df_clean['Stage Judge 1'] + df_clean['Stage Judge 2'] + df_clean['Stage Judge 3']) / 3

In [19]:
# sort df_clean by year
df_clean.sort_values(by=['Year'], inplace=True)

# drop Event column
df_clean = df_clean.drop(columns=['Event'])

# drop TEA column
df_clean = df_clean.drop(columns=['TEA'])

# drop ID column
df_clean = df_clean.drop(columns=['ID'])




In [20]:
# import all of the pml csv files into a dataframe
files = glob.glob("csv_files/pml/*.csv")
pml_df = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)
# convert pml_df to csv
pml_df.to_csv("csv_files/pml_output.csv")

In [21]:
df_clean['Date'] = df_clean['Date'].str.replace('DATE of EVENT ', '')
judge_name_columns = ['cj1', 'cj2', 'cj3', 'srj1', 'srj2', 'srj3']
for i in judge_name_columns:
    df_clean[i] = df_clean[i].str.replace('1. ', '', regex=False)
    df_clean[i] = df_clean[i].str.replace('2. ', '', regex=False)
    df_clean[i] = df_clean[i].str.replace('3. ', '', regex=False)
    # all lowercase
    df_clean[i] = df_clean[i].str.lower()

In [22]:
# for each judge name column, remove everything after the first comma
for i in judge_name_columns:
    #df_clean[i] = df_clean[i].str.split(',', expand=True)[0]
    #df_clean[i] = df_clean[i].str.split('-', expand=True)[0]
    # trim whitespace
    df_clean[i] = df_clean[i].str.strip()

In [23]:
all_columns = df_clean.columns
# trim whitespace from all columns
for i in all_columns:
    try:
        df_clean[i] = df_clean[i].str.strip()
    except:
        pass

In [24]:
# drop rows where School = 0
df_clean = df_clean[df_clean['School'] != '0']

In [25]:
# get rows where year is greater than 3000
df_clean_yr_error = df_clean[df_clean['Year'] > 3000]

# drop rows where year is greater than 3000
df_clean = df_clean[df_clean['Year'] < 3000]

df_clean_yr_error['Year'] = df_clean_yr_error['Classification']
df_clean_yr_error['Classification'] = df_clean_yr_error['Conference']

# fill conference column with blanks
df_clean_yr_error['Conference'] = ''


In [26]:
# combine df_clean and df_clean_yr_error
df_clean = pd.concat([df_clean, df_clean_yr_error], ignore_index=True)

In [27]:
# for each column, remove all text after ' - '
for i in judge_name_columns:
    df_clean[i] = df_clean[i].str.split(' - ', expand=True)[0]
    # trim whitespace
    df_clean[i] = df_clean[i].str.strip()

df_clean


Unnamed: 0,School,City,Directors,Conference,Classification,Year,Stage Judge 1,Stage Judge 2,Stage Judge 3,Stage Final,...,cj1,cj2,cj3,srj1,srj2,srj3,Selection 1 Comp/arr,Selection 2 Comp/arr,Selection 3 Comp/arr,Stage Average
0,James Bowie HS,Arlington,Larry Brown,AAAAA,Non-Varsity,2005,1.0,1.0,1.0,1.0,...,richard bass,"joe frank, jr.",rodney klett,george jones,tom neugent,marion west,Chambers,Grainger/Rogers/SMC,Gregson,1.000000
1,Munday HS,Munday,Rodney D. Bennett,A,Varsity,2005,1.0,1.0,2.0,1.0,...,mike glaze,richard herrera,will burks,harold bufe,mack bibb,june bearden,King/Glover,Stuart,Swearingen,1.333333
2,Holliday HS,Holliday,Melanie Hadderton,AA,Varsity,2005,2.0,2.0,3.0,2.0,...,mike glaze,richard herrera,will burks,harold bufe,mack bibb,june bearden,Carl King,Sheldon,Meyer,2.333333
3,Decatur HS,Decatur,Doug Fulwood,AAA,Varsity,2005,1.0,2.0,2.0,2.0,...,mike glaze,richard herrera,will burks,harold bufe,mack bibb,june bearden,King/Swearingen,David Black,Robert Sheldon,1.666667
4,City View HS,Wichita Falls,Terah Kay Shawver,AA,Varsity,2005,1.0,1.0,1.0,1.0,...,mike glaze,richard herrera,will burks,harold bufe,mack bibb,june bearden,LaPlante,Sheldon,Strommen,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60225,Alvarado MS,Alvarado,Kelli Bahner / Joe Gunn,,Non-Varsity,2014,1.0,3.0,1.0,2.0,...,christine cumberledge,james marioneaux,julie amos,corey ash,harold bufe,,Owens/,Murtha,,1.666667
60226,Alvarado MS,Alvarado,Kelli Bahner / Joe Gunn,,Varsity,2014,1.0,1.0,1.0,1.0,...,christine cumberledge,james marioneaux,julie amos,corey ash,harold bufe,,Himes/,Concert March,,1.000000
60227,Del Rio HS,Del Rio,Daniel White,,Sub Non-Varsity,2014,1.0,1.0,1.0,1.0,...,bob whipkey,rogerio olivarez,charles cabrera,kyle friesenhahn,juan sosa,,Standridge/,King/Swearingen,,1.000000
60228,Stinson MS,San Antonio,Kevin Leman / Alex Melendez,,Non-Varsity,2014,1.0,1.0,1.0,1.0,...,kim rosenberg,javier vera,james snider,cathy teltschik,larry wolf,,Gazlay/,Smith/,,1.000000


In [114]:
# print unique city names
city_list = df_clean['City'].unique()

In [115]:
# import the cities csv file


cities_df = pd.read_csv('csv_files/cities/cities.csv' )

In [116]:
df_cities = df_clean
# use fuzzywuzzy to match city names
for i in city_list:
    match = process.extractOne(i, cities_df['City'])
    # replace city name with matched city name
    df_cities['City'] = df_cities['City'].replace(i, match[0])


    



In [None]:
df_clean = df_cities

In [None]:
df_clean['City']

In [28]:
for each in judge_name_columns:
    df_clean[each] = df_clean[each].str.replace(', lubbock', '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-harlingen", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", san antonio", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", sharyland", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", austin", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", moore", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", mission", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", edinburg north hs, edinburg cisd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", edinburg north hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", edinburg cisd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", lake travis", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", robstown hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", robstown", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", h.e.b.", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", duncanville", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", concert", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", sr", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", roma isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", roma", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", retired", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", corpus christi", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", la feria hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", la feria", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", iii", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", orange grove", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", weslaco", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", chair", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("(chair)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", director fine arts", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", director of fine arts", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", tomball isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", baytown", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", ret.", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", edinburg hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", edinburg", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", juarez-lincoln hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", la joya isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", la joya", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", alvarado", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", mcallen isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", mcallen", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", united isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", laredo", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", mcqueeny", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", seguin", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", mission cisd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", connally hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", el paso", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", el paso isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", brownsville isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", brownsville", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", inst.musicadv.", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", grulla hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", rio grande city isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", rio grande city", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", midway hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", woodway", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", tuloso-midway hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", ac blunt ms", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", aransas pass", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", leander", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", falfurrias hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", falfurrias", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", kingsville", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-retired", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", weslaco isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", fine arts administrator", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(",la joya isd", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", aledo", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", canyon", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", p.s.j.a.", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", odem hs", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", c.o. wilson ms", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", garland", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", houston", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-banda", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("*", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("--", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("1", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("2", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("3", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("4", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("5", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("6", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("xxx", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("virginia osolvsky", 'virginia olsovsky', regex=False)
    df_clean[each] = df_clean[each].str.replace("virginia osovsky", 'virginia olsovsky', regex=False)
    df_clean[each] = df_clean[each].str.replace("virginian olsovsky", 'virginia olsovsky', regex=False)
    df_clean[each] = df_clean[each].str.replace("villareal", 'villarreal', regex=False)
    df_clean[each] = df_clean[each].str.replace("-flour bluff", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(" d. ", ' ', regex=False)
    df_clean[each] = df_clean[each].str.replace("wallace diefolf", 'wallace dierolf', regex=False)
    df_clean[each] = df_clean[each].str.replace("walace dierolf", 'wallace dierolf', regex=False)
    df_clean[each] = df_clean[each].str.replace("unknown", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("ty ann payne", 'tye ann payne', regex=False)
    df_clean[each] = df_clean[each].str.replace("tye ann payne", 'tye payne', regex=False)
    df_clean[each] = df_clean[each].str.replace("-bastrop", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("(10th)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("(11th)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("allmany", 'almany', regex=False)
    df_clean[each] = df_clean[each].str.replace("mcelory", 'mcelroy', regex=False)
    df_clean[each] = df_clean[each].str.replace("knolficek", 'knoflicek', regex=False)
    df_clean[each] = df_clean[each].str.replace("knloficek", 'knoflicek', regex=False)
    df_clean[each] = df_clean[each].str.replace("knofllicek", 'knoflicek', regex=False)
    df_clean[each] = df_clean[each].str.replace("tom herrington", 'tom harrington', regex=False)
    df_clean[each] = df_clean[each].str.replace(" (chrmn.)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(" (chmn)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(" (chmn.)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("clearwarter", 'clearwater', regex=False)
    df_clean[each] = df_clean[each].str.replace(" .j ", ' ', regex=False)
    df_clean[each] = df_clean[each].str.replace("tim edens", 'tim edins', regex=False)
    df_clean[each] = df_clean[each].str.replace("tim andersen", 'tim anderson', regex=False)
    df_clean[each] = df_clean[each].str.replace("terri brockway", 'teri brockway', regex=False)
    df_clean[each] = df_clean[each].str.replace("tbd/", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("tammy fedenych", 'tammy fedynich', regex=False)
    df_clean[each] = df_clean[each].str.replace("tammy fedinich", 'tammy fedynich', regex=False)
    df_clean[each] = df_clean[each].str.replace("tammy fednmich", 'tammy fedynich', regex=False)
    df_clean[each] = df_clean[each].str.replace("susan meyer-patterson", 'susan patterson', regex=False)
    df_clean[each] = df_clean[each].str.replace("susan meyer patterson", 'susan patterson', regex=False)
    df_clean[each] = df_clean[each].str.replace("-houston", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-pearsall", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-psja", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-corpus christi", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-keller", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("steel, jason", 'steele, jason', regex=False)
    df_clean[each] = df_clean[each].str.replace("maudlin", 'mauldin', regex=False)
    df_clean[each] = df_clean[each].str.replace("stacy claek", 'stacy clark', regex=False)
    df_clean[each] = df_clean[each].str.replace("stacy clark", 'stacey clark', regex=False)
    df_clean[each] = df_clean[each].str.replace("sr #1", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("sr #2", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("sr #3", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("4/11/17", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("4/13/17", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("see region 21 web site", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("see original event", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("see original contest", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("see original evenrt", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("sheppard", 'shepherd', regex=False)
    df_clean[each] = df_clean[each].str.replace("-beaumont", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("sandy bow brunskill", 'sandra brunskill', regex=False)
    df_clean[each] = df_clean[each].str.replace("sandy brunskill", 'sandra brunskill', regex=False)
    df_clean[each] = df_clean[each].str.replace("sandra bow brunskill", 'sandra brunskill', regex=False)
    df_clean[each] = df_clean[each].str.replace("sandra bow-brunskill", 'sandra brunskill', regex=False)
    df_clean[each] = df_clean[each].str.replace("rylon guidory", 'rylon guidry', regex=False)
    df_clean[each] = df_clean[each].str.replace("-austin", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("ryan straten", 'ryan stratten', regex=False)
    df_clean[each] = df_clean[each].str.replace("-mabank", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("rusty honeycutt", 'rustin honeycutt', regex=False)
    df_clean[each] = df_clean[each].str.replace("barerra", 'barrera', regex=False)
    df_clean[each] = df_clean[each].str.replace("costellano", 'castellano', regex=False)
    df_clean[each] = df_clean[each].str.replace(" m. ", ' ', regex=False)
    df_clean[each] = df_clean[each].str.replace(",san antonio", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(", frisco", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("/poteet", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(" (8th)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(" (9th)", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-spring", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("margarit", 'margaret', regex=False)
    df_clean[each] = df_clean[each].str.replace("nv-", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("/v-", '/', regex=False)
    df_clean[each] = df_clean[each].str.replace("//", '/', regex=False)
    #df_clean[each] = df_clean[each].str.replace("^x?", '', regex=True)
    df_clean[each] = df_clean[each].str.replace("^tba?", '', regex=True)
    df_clean[each] = df_clean[each].str.replace("^tbd?", '', regex=True)
    df_clean[each] = df_clean[each].str.replace("- -", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("-plano", '', regex=False)
    df_clean[each] = df_clean[each].str.replace(" .k ", ' ', regex=False)
    df_clean[each] = df_clean[each].str.replace("bene davis ", 'ben davis', regex=False)
    df_clean[each] = df_clean[each].str.replace("benny davis", 'ben davis', regex=False)
    df_clean[each] = df_clean[each].str.replace("see region website", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("see region web site", '', regex=False)
    df_clean[each] = df_clean[each].str.replace("rick yancy", 'rick yancey', regex=False)
    df_clean[each] = df_clean[each].str.replace("phillip alvarado", 'phil alvarado', regex=False)
    #df_clean[each] = df_clean[each].str.replace("^d?", '', regex=True)
    #df_clean[each] = df_clean[each].str.replace("^f?", '', regex=True)
    #df_clean[each] = df_clean[each].str.replace("^judge?", '', regex=True)
    #df_clean[each] = df_clean[each].str.replace("^b?", '', regex=True)
    #df_clean[each] = df_clean[each].str.replace("^c?", '', regex=True)
    #df_clean[each] = df_clean[each].str.replace("^a?", '', regex=True)
    #df_clean[each] = df_clean[each].str.replace("^e?", '', regex=True)
    #df_clean[each] = df_clean[each].str.replace("^t?", '', regex=True)

    df_clean[each] = df_clean[each].str.strip()

In [29]:
# convert to csv
df_clean.to_csv("csv_files/full_output.csv")