In [1]:
import geopandas as gp
import pandas as pd
import os
import re

#THESE SETTINGS ESSENTIAL TO HAVE THE FIELDS TABLE SHOW UP CORRECTLY in the readme
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [2]:
level_race_name_dict = {"CONG_DIST":"CON",
    "SLDL_DIST":"SL",
    "SLDU_DIST":"SU",
    "SCONG_DIST":"SCON"}

In [3]:
'''Following Docstring Convention: https://www.python.org/dev/peps/pep-0257/'''

import pandas as pd
import os
#THESE SETTINGS ESSENTIAL TO HAVE THE FIELDS TABLE SHOW UP CORRECTLY in the readme
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)


def create_fields_table(race_field_header_0, fields_dict_0, 
                        add_race_field_header_1 = '', fields_dict_1 = {}, 
                        add_race_field_header_2 = '', fields_dict_2 = {}, 
                        add_race_field_header_3 = '', fields_dict_3 = {}):
    '''Purpose: Create fields table used in readme based on field dictionary created separately
    Arguments:
        race_field_header_0: include asterisks "***text***" and label first set of fields
        fields_dict_0: the default dictionary for the primary file (statewide)
        add_race_field_header_1: include asterisks to draw attention to section - ex: "***additional_race_file_name_fields***"
        fields_dict_1: additional fields to go under add_race_field_header_1 header
        add_race_field_header_2 and _3: same use as add_race_field_header_1 - include as needed
        fields_dict_2 and _3: same use as fields_dict_1 - include as needed associated with corresponding add_race_field_header section
    '''
    fields_table_data = {'Field Name': ['',race_field_header_0]  + list(fields_dict_0.keys()) +
                         ['',add_race_field_header_1] + list(fields_dict_1.keys()) +
                         ['',add_race_field_header_2] + list(fields_dict_2.keys()) +
                         ['',add_race_field_header_3] + list(fields_dict_3.keys()),
                         'Description': ['',''] + list(fields_dict_0.values()) + 
                         ['',''] + list(fields_dict_1.values()) + 
                         ['',''] + list(fields_dict_2.values()) +
                         ['',''] + list(fields_dict_3.values())}
    fields_table = pd.DataFrame(fields_table_data)
    return fields_table

def erj_readme_template(stateabrv, state, year, election_type, additional_races, retrieval_date, vest_file_link, 
                        raw_data_source, state_erj_repo, office_codes, 
                        race_field_header_0, fields_dict_0, add_race_field_header_1 = '', fields_dict_1 = {}, add_race_field_header_2 = '', fields_dict_2 = {}, 
                        add_race_field_header_3 = '', fields_dict_3 = {},
                       additional_notes=' '):
    '''Purpose: standardize language in ERJ file README.txt
    Arguemts:
        fields_dict = used to create the fields table for the non-standardized/race fields fields. 
                    Key is the field/value is the field description
        stateabrv = two character state abbreviation capitalized, 
        state = state name, first letter capitalized, 
        year = election year (XXXX), 
        election_type = general, primary, special or runoff, 
        additional_races = the races that RDH added to the original vest file (not in VEST's og file), 
        retrieval_date = date RDH retrieved VEST file, 
        vest_file_link = link to dataverse page for VEST's precinct boundary and election results file, 
        raw_data_source = site description and link, 
        state_erj_repo = link to erj github repository for given state
        office_codes = codes used broken off of field names for easy viewing. 
            For SU/SL/CON, include ##, so SU## for office code
        race_field_header_0: include asterisks "***text***" and label first set of fields
        fields_dict_0: the default dictionary for the primary file (statewide)
        add_race_field_header_1: include asterisks to draw attention to section - ex: "***additional_race_file_name_fields***"
        fields_dict_1: additional fields to go under add_race_field_header_1 header
        add_race_field_header_2 and _3: same use as add_race_field_header_1 - include as needed
        fields_dict_2 and _3: same use as fields_dict_1 - include as needed associated with corresponding add_race_field_header section
        additional_notes = default set to empty, but fill in with string where applicable.
    '''
#First section of README
    readme_p1 = '''{year} {stateabrv} {election_type} Precinct Boundary and Election Results
## RDH Date Retrieval
{retrieval_date}
## Sources
The RDH retrieved the VEST {year} {election_type} precinct boundary and election results shapefile from [VEST's Harvard Dataverse]({vest_file_link})
The RDH retrieved raw {year} {election_type} election results from {raw_data_source}
## Notes on Field Names (adapted from VEST):
Columns reporting votes generally follow the pattern: 
The first character is G for a general election, P for a primary, S for a special, and R for a runoff.
Characters 2 and 3 are the year of the election.*
Characters 4-6 represent the office type (see list below).
Character 7 represents the party of the candidate.
Characters 8-10 are the first three letters of the candidate's last name.
One example is:
G16PREDCLI
To fit within the GIS 10 character limit for field names, the naming convention is slightly different for the State Legislature and 
US House of Representatives. All fields are listed below with definitions.
Office Codes Used:
{office_codes}
## Fields:
'''.format(stateabrv= stateabrv, state= state, year=year, election_type=election_type, additional_races=additional_races,retrieval_date=retrieval_date, vest_file_link=vest_file_link, raw_data_source=raw_data_source, state_erj_repo=state_erj_repo, office_codes=office_codes)

#Second section of README
    fields_table = create_fields_table(race_field_header_0, fields_dict_0, add_race_field_header_1, fields_dict_1, add_race_field_header_2, fields_dict_2, 
                        add_race_field_header_3, fields_dict_3)
    readme_p2 = fields_table.to_string(formatters={'Description':'{{:<{}s}}'.format(fields_table['Description'].str.len().max()).format, 'Field Name':'{{:<{}s}}'.format(fields_table['Field Name'].str.len().max()).format}, index=False)

#Third section of README
    readme_p3 = '''
## Processing Steps
    
The RDH joined additional election results to VEST's existing precinct shapefile, including {additional_races} using Python.
For more information on the processing completed, visit our [Github repository]({state_erj_repo}) for Election Result Joins (ERJ) for {state}.
Where possible, the RDH validated the election results we processed against VEST's election results. For additional races the RDH manually checked state totals. For more information on this comparison, please see our processing on Github ({state_erj_repo}).
## Additional Notes
{additional_notes}
Please contact info@redistrictingdatahub.org for more information.
'''.format(stateabrv=stateabrv, state=state, year=year, election_type=election_type, additional_races=additional_races, state_erj_repo=state_erj_repo, office_codes=office_codes, additional_notes = additional_notes)
    
    full_readme = str(readme_p1)+str(readme_p2)+str(readme_p3)
    return full_readme

def export_readme(readme_name, state, election_type, full_readme_text):
    
    '''Purpose: Turn README string into a txt file in the ERJ folder
    Argument note:
        readme_name must include file path to readme within erj folder
        ex: 
        readme_name = './az_gen_20_prec/README.txt'
    '''
    with open(readme_name, 'x') as tf:
        tf.write(full_readme_text)
    print(state, election_type, " readme moved to folder")

In [23]:
stateabrv = "VA"
state = "Virginia"
year = "2020"
election_type = "Primary"
additional_races = "President, U.S. Senator, U.S. Congressional"
retrieval_date = "08/22/2022"
upload_date = "12/06/2022" 
vest_file_link = "https://dataverse.harvard.edu/file.xhtml?fileId=6174181&version=40.0"
raw_data_source = "Election results from Virginia's Department of Elections (https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2020/) and precinct shapefiles from VEST's VA 2020 file.\n" 
state_erj_repo = "https://github.com/nonpartisan-redistricting-datahub/erj-va"
                
additional_notes = '''
~all files~

'''


In [24]:
va_cand_dict = {'WRITE IN VOTES for President':'P20PREDWRI', 
                'AMY J. KLOBUCHAR for President':'P20PREDKLO',
                'ANDREW YANG for President':'P20PREDYAN', 
                'BERNARD SANDERS for President':'P20PREDSAN',
                'CORY A BOOKER for President':'P20PREDBOO', 
                'DEVAL PATRICK for President':'P20PREDPAT',
                'ELIZABETH WARREN for President':'P20PREDWAR',
                'JOSEPH ROBINETTE BIDEN for President':'P20PREDBID',
                'JULIÁN CASTRO for President':'P20PREDCAS',
                'MARIANNE D WILLIAMSON for President':'P20PREDWIL',
                'MICHAEL F. BENNET for President':'P20PREDBEN',
                'MICHAEL R BLOOMBERG for President':'P20PREDBLO',
                'PETER P. M. BUTTIGIEG for President':'P20PREDBUT',
                'THOMAS STEYER for President':'P20PREDSTE', 
                'TULSI GABBARD for President':'P20PREDGAB',
                'WRITE IN VOTES for Member House of Representatives2':'PCON02RWRI',
                'WRITE IN VOTES for Member United States Senate':'P20USSRWRI',
                'ALISSA ANNE BALDWIN for Member United States Senate':'P20USSRBAL',
                'BENITO LOYOLA for Member House of Representatives2':'PCON02RLOY',
                'DANIEL MAC ARTHUR GADE for Member United States Senate':'P20USSRGAD',
                'JAROME BELL for Member House of Representatives2':'PCON02RBEL',
                'SCOTT WILLIAM TAYLOR for Member House of Representatives2':'PCON02RTAY',
                'THOMAS A SPECIALE for Member United States Senate':'P20USSRSPE',
                'WRITE IN VOTES for Member House of Representatives3':'PCON03RWRI',
                'GEORGE MICHAEL YACUS for Member House of Representatives3':'PCON03RYAC',
                'JOHN HARRISON MADISON DOWNS for Member House of Representatives3':'PCON03RDOW',
                'JOHN WILLIAM COLLICK for Member House of Representatives3':'PCON03RCOL',
                'WRITE IN VOTES for Member House of Representatives5':'PCON05DWRI',
                'BRYANT CAMERON WEBB for Member House of Representatives5':'PCON05DWEB',
                'CLAIRE CARMEN RUSSO for Member House of Representatives5':'PCON05DRUS',
                'JOHN DANIEL LESINSKI for Member House of Representatives5':'PCON05DLES',
                'ROGER DEAN HUFFSTETLER for Member House of Representatives5':'PCON05DHUF',
                'WRITE IN VOTES for Member House of Representatives1':'PCON01DWRI',
                'LAVANGELENE AEREKA WILLIAMS for Member House of Representatives1':'PCON01DWIL',
                'QASIM RASHID for Member House of Representatives1':'PCON01DRAS',
                'WRITE IN VOTES for Member House of Representatives4':'PCON04DWRI',
                'ASTON DONALD MCEACHIN for Member House of Representatives4':'PCON04DMCE',
                'RONGERLIS CAZEL LEVINE for Member House of Representatives4':'PCON04DLEV',
                'WRITE IN VOTES for Member House of Representatives11':'PCON11DWRI',
                'GERALD EDWARD CONNOLLY for Member House of Representatives11':'PCON11DCON',
                'ZAINAB MASOOMA MOHSINI for Member House of Representatives11':'PCON11DMOH'}

In [25]:
holder = list(va_cand_dict.values())
holder.sort()

In [26]:
races_list = ['G20PREDBID', 'G20PRELJOR', 'G20PREOWRI', 'G20PRERTRU', 'G20USSDWAR', 'G20USSOWRI', 'G20USSRGAD', 'GCON01DRAS', 'GCON01OWRI', 'GCON01RWIT', 'GCON02DLUR', 'GCON02IFOS', 'GCON02OWRI', 'GCON02RTAY', 'GCON03DSCO', 'GCON03OWRI', 'GCON03RCOL', 'GCON04DMCE', 'GCON04OWRI', 'GCON04RBEN', 'GCON05DWEB', 'GCON05OWRI', 'GCON05RGOO', 'GCON06DBET', 'GCON06OWRI', 'GCON06RCLI', 'GCON07DSPA', 'GCON07OWRI', 'GCON07RFRE', 'GCON08DBEY', 'GCON08OWRI', 'GCON08RJOR', 'GCON09OWRI', 'GCON09RGRI', 'GCON10DWEX', 'GCON10OWRI', 'GCON10RAND', 'GCON11DCON', 'GCON11OWRI', 'GCON11RANA']

In [27]:
cleaner_cand_dict = {va_cand_dict[i]:i for i in va_cand_dict.keys()}

In [28]:
sorted_cand_dict = {i:cleaner_cand_dict[i].split(" for")[0].title() for i in holder}

In [29]:
sorted_cand_dict

{'P20PREDBEN': 'Michael F. Bennet',
 'P20PREDBID': 'Joseph Robinette Biden',
 'P20PREDBLO': 'Michael R Bloomberg',
 'P20PREDBOO': 'Cory A Booker',
 'P20PREDBUT': 'Peter P. M. Buttigieg',
 'P20PREDCAS': 'Julián Castro',
 'P20PREDGAB': 'Tulsi Gabbard',
 'P20PREDKLO': 'Amy J. Klobuchar',
 'P20PREDPAT': 'Deval Patrick',
 'P20PREDSAN': 'Bernard Sanders',
 'P20PREDSTE': 'Thomas Steyer',
 'P20PREDWAR': 'Elizabeth Warren',
 'P20PREDWIL': 'Marianne D Williamson',
 'P20PREDWRI': 'Write In Votes',
 'P20PREDYAN': 'Andrew Yang',
 'P20USSRBAL': 'Alissa Anne Baldwin',
 'P20USSRGAD': 'Daniel Mac Arthur Gade',
 'P20USSRSPE': 'Thomas A Speciale',
 'P20USSRWRI': 'Write In Votes',
 'PCON01DRAS': 'Qasim Rashid',
 'PCON01DWIL': 'Lavangelene Aereka Williams',
 'PCON01DWRI': 'Write In Votes',
 'PCON02RBEL': 'Jarome Bell',
 'PCON02RLOY': 'Benito Loyola',
 'PCON02RTAY': 'Scott William Taylor',
 'PCON02RWRI': 'Write In Votes',
 'PCON03RCOL': 'John William Collick',
 'PCON03RDOW': 'John Harrison Madison Downs',
 

In [30]:
import os

fields_dict_0 = sorted_cand_dict
race_field_header_0 = "***va_prim_20_st_cong_prec.zip***"

In [31]:
fields_dict_0["UNIQUE_ID"] = "Unique Identifier"
fields_dict_0['COUNTYFP'] = "County FIP"
fields_dict_0['LOCALITY'] = "State FIP"
fields_dict_0["VTDST"] = "VTD Code"
fields_dict_0['PRECINCT'] = "Precinct Name"
fields_dict_0['CONG_DIST'] = "Congressional District"

In [32]:
fields_dict = {}
fields_dict["UNIQUE_ID"] = "Unique Identifier"
fields_dict['COUNTYFP'] = "County FIP"
fields_dict['STATEFP'] = "State FIP"
fields_dict["VTDST"] = "VTD Code"
fields_dict["NAME"] = "VTD Name"
fields_dict["COUNTY"] = "County Name"
fields_dict["PREC_NAME"] = "Precinct Name"

fields_dict["CONG_DIST"] = "U.S. Congressional District"

In [33]:
office_dict = {}


office_dict['PRE'] = 'President'
office_dict["USS"] = 'U.S. Senator'
office_dict["CONXX"] = 'U.S. Congress'




In [34]:
office_codes = office_dict

In [35]:
full_readme = erj_readme_template(fields_dict, stateabrv, state, year, election_type, additional_races, retrieval_date, upload_date, vest_file_link, 
                        raw_data_source, state_erj_repo, office_codes, additional_notes)

In [36]:
full_readme = erj_readme_template(stateabrv, state, year, election_type, additional_races, retrieval_date, vest_file_link, 
                        raw_data_source, state_erj_repo,
                        office_codes, 
                        race_field_header_0, fields_dict_0,
                       additional_notes=' ')

In [39]:
export_readme("./va_prim_20_prec/README.txt", "Virginia", "primary", full_readme)

FileExistsError: [Errno 17] File exists: './va_prim_20_prec/README.txt'