# Raw from source

[MEDSL:](https://github.com/MEDSL/2022-elections-official/blob/main/individual_states/2022-ms-local-precinct-general.zip) Precinct-level results, derived from OpenElections

[Mississippi Sec. of State:](https://www.sos.ms.gov/elections-voting/2022-general-election-results) Precinct-level results, in PDF format

# Setup

Import libraries, load in original precinct-level data, set directories

In [1]:
import pandas as pd
import os
import numpy as np
import re

#Stop warnings about future changes to libraries from crowding output
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
#Set the working directory
project_folder = '/Users/grantschwab/Desktop/RDH/Projects/mississippi_general2022/update'
os.chdir(project_folder)
wd = os.getcwd()

#Make folder for output files
if not os.path.exists("output"):
  os.mkdir("output")
  print("The 'output' folder has been created.")
else:
  print("The folder already exists.")

The 'output' folder has been created.


# Process original results

## Read in results, clean, pivot prep

In [3]:
#Read in file
frame = pd.read_csv('./raw-from-source/ms_cleaned22.csv')

In [4]:
#Set up unique precinct identifier
frame['county_code'] = frame.county_name.str[:3]
frame['pct_std'] = frame.precinct + ':::' + frame.county_code

#How many precincts?
frame.pct_std.nunique()

1758

Looking at which contests to include

In [5]:
#Examine contests included
frame['office'].unique()

#Helpful for making informed assessment of which contests to include in file
unique_precincts = list(frame['pct_std'].unique())
all_contests = sorted(list(frame['office'].unique()))
len_all_contests = len(all_contests)
listofzeros = [0] * len_all_contests
num_precs = len(unique_precincts)
contests_dict = dict(zip(all_contests,listofzeros))
in_all_precincts = []
for i in unique_precincts:
    sub_df = frame[frame['pct_std']==i]
    contests = list(sub_df['office'].unique())
    for contest in contests:
        contests_dict[contest] = int(contests_dict.get(contest))+1
contests_keep_guess =[]
for k,v in contests_dict.items():
    if v>100:
        print(k,'\t',v)
        contests_keep_guess.append(k)
print(contests_keep_guess)

BALLOTS CAST 	 108
BALLOTS CAST - BLANK 	 108
COURT OF APPEALS 	 128
REGISTERED VOTERS 	 108
US HOUSE 	 1758
['BALLOTS CAST', 'BALLOTS CAST - BLANK', 'COURT OF APPEALS', 'REGISTERED VOTERS', 'US HOUSE']


Starting off w/ US House and Court of Appeals <br>
Unclear what BALLOTS CAST or REGISTERED VOTERS signals... but they only happen in HINDS county

In [6]:
#Choosing which contests to keep
contests_keep = ['US HOUSE']

#Filter to only include results for contest types in list above
filtered_frame = frame[frame['office'].isin(contests_keep)]

Cleaning and creating pivot columns

In [7]:
#Cast the contest name and code columns to string to make the pivot col
filtered_frame["office"] = filtered_frame["office"].astype(str)

#Add party abbreviation column
filtered_frame['party_abbrev'] = filtered_frame.party_detailed.str[:3]

# Create the pivot column with the contest name and the candidate
filtered_frame["pivot_col"] = filtered_frame["office"] + "-:-" + filtered_frame['candidate'] + "-:-" +"PARTY:" +filtered_frame['party_abbrev']

# Where it exists, add in the district to the pivot column
filtered_frame["pivot_col"] = np.where(filtered_frame["district"]!=" ",filtered_frame["pivot_col"] + "-:-" + filtered_frame["district"], filtered_frame["pivot_col"])

#filtered_frame.drop(['Election Number', 'Election Date', 'Total Registered Republicans','Total Registered Democrats', 'Total Registered All Other Parties','Candidate Florida Voter Registration System ID Number','DOE Assigned Candidate Number or Retention/Issue Number'], axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_frame["office"] = filtered_frame["office"].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_frame['party_abbrev'] = filtered_frame.party_detailed.str[:3]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_frame["pivot_col"] = filtered_frame["office"] + "-:-" + filte

## Pivot and clean

In [8]:
#Pivot the data so that each row has all the results from that precinct
pivoted_2022 = pd.pivot_table(filtered_frame, values=['votes'], index=["pct_std",'county_code','county_name',"precinct", 'county_fips'],columns=['pivot_col'],aggfunc=sum)

#Clean up the indices
pivoted_2022.reset_index(inplace=True,drop=False)
pivoted_2022.columns = [' '.join(col).strip() for col in pivoted_2022.columns.values]
pivoted_2022.columns = pivoted_2022.columns.str.replace("votes ","")
pivoted_2022 = pivoted_2022.fillna(0)

#Exclude state identifier from county_fips column
pivoted_2022['county_fips'] = pivoted_2022['county_fips'].astype(str).str[2:]

#Remove single quotes from all column names, part of cleaning candidate surnames like O'Brien
pivoted_2022.columns = [col.replace("'", "") for col in pivoted_2022.columns]

#Format Court of Appeals district/position
pivoted_2022.columns = pivoted_2022.columns.str.replace(", POSITION ",".").str.replace(", POSITON ",".")

Make sure to drop precincts that are aggregates of others

In [9]:
#Drop precinct entries that total full county results
pivoted_2022 = pivoted_2022[~pivoted_2022['precinct'].str.contains('TOTAL')]

In [10]:
#View data
pivoted_2022.head()

Unnamed: 0,pct_std,county_code,county_name,precinct,county_fips,US HOUSE-:-ALDEN PATRICK JOHNSON-:-PARTY:LIB-:-004,US HOUSE-:-BENNIE G THOMPSON-:-PARTY:DEM-:-002,US HOUSE-:-BRIAN FLOWERS-:-PARTY:REP-:-002,US HOUSE-:-DIANNE BLACK-:-PARTY:DEM-:-001,US HOUSE-:-JOHNNY L DUPREE-:-PARTY:DEM-:-004,US HOUSE-:-MICHAEL GUEST-:-PARTY:REP-:-003,US HOUSE-:-MIKE EZELL-:-PARTY:REP-:-004,US HOUSE-:-SHUWASKI A YOUNG-:-PARTY:DEM-:-003,US HOUSE-:-TRENT KELLY-:-PARTY:REP-:-001
0,(01) NEW HOPE BAPTIST CHURCH:::PIK,PIK,PIKE,(01) NEW HOPE BAPTIST CHURCH,113,0.0,0.0,0.0,0.0,0.0,31.0,0.0,244.0,0.0
1,(02) S MCCOMB BAPTIST CHURCH:::PIK,PIK,PIKE,(02) S MCCOMB BAPTIST CHURCH,113,0.0,0.0,0.0,0.0,0.0,32.0,0.0,76.0,0.0
2,(03) SUMMIT UNITED METHODIST CHURCH:::PIK,PIK,PIKE,(03) SUMMIT UNITED METHODIST CHURCH,113,0.0,0.0,0.0,0.0,0.0,43.0,0.0,125.0,0.0
3,(04) MLK CENTER:::PIK,PIK,PIKE,(04) MLK CENTER,113,0.0,0.0,0.0,0.0,0.0,25.0,0.0,262.0,0.0
4,(05) NORTH MCCOMB BAPTIST CHURCH:::PIK,PIK,PIKE,(05) NORTH MCCOMB BAPTIST CHURCH,113,0.0,0.0,0.0,0.0,0.0,199.0,0.0,107.0,0.0


## Loading helper functions

These are good column renaming and dictionary creation

In [11]:
#Setting up a pre-fab function for assigning legislative abbreviations
def get_level_dist(column_name):
    zfill_level = 2
    if "US HOUSE" in column_name:
        level = "CON"
    elif "COURT OF APPEALS" in column_name:
        level = "CRJ"
    else:
        raise ValueError
    return_val = re.findall("*",column_name)
    if (len(return_val)!=0):
        dist = return_val[0]
        dist = dist.zfill(zfill_level)
    else:
        raise ValueError
    return level,dist

In [12]:
#Set up contest level
def get_race(contest):
    if "President" in contest:
        level = "PRE"
    elif ("US HOUSE" in contest or "COURT OF APPEALS" in contest):
        contest_info = get_level_dist(contest)
        level = contest_info[0]+contest_info[1]
    else:
        print(contest)
        raise ValueError
    return level

In [13]:
#Set up election type (general or primary)
def get_election_type(contest):
    return "G"

In [14]:
#Set up contest three-letter descriptor
def get_race(contest):
    mod_level = ""
    level = contest.split("-:-")[0]
    level_change_dict = {
        'COURT OF APPEALS':'CAJ',
        'US HOUSE':'CON'}
    for val in level_change_dict.keys():
        if val in level:
            mod_level = level_change_dict[val]
            break
    if mod_level == "":
        print("NO CONTEST", contest)
    if mod_level == 'A':
        mod_level += level.split(":")[0].split(" ")[-1]

    return mod_level



In [15]:
#Set up party single-letter identifier
def get_party(contest):
    if "Amendment" in contest:
        return ""
    elif "Retention of" in contest:
        return contest.split("-:-")[1][0].upper()
    elif "PARTY:DEM" in contest:
        return "D"
    elif "PARTY:REP" in contest:
        return "R"
    elif "PARTY:LIB" in contest:
        return "L"
    ## Reform -> F
    elif "PARTY:REF" in contest:
        return "O"
    elif "PARTY:IND" in contest:
        return "I"
    elif "PARTY:NON" in contest:
        return "N"
    elif "PARTY:CPF" in contest:
        return "C"
    elif "PARTY:WRI" in contest:
        return "O"
    elif "PARTY:NPA" or "PARTY:NOP" in contest:
        return "N"
    else:
        print(contest)
        return ValueError


In [16]:
#Set up function for first three letters of candidate's surname
def get_name(contest):
  candidate = contest.split("-:-")[1]
  candidate = candidate.upper()
  likely_last = candidate.split(" ")[-1]
  if likely_last in ["JR","III","II","SR"] or (len(candidate.split(" "))==3 and "." not in candidate and '\"' not in candidate):
      return candidate.split(" ")[2][0:3]
  else:
      return likely_last[0:3]

In [17]:
#Set up function for getting district string
def get_district(contest):
    district_string = contest.split("-:-")[3]
    likely = district_string
    if "US HOUSE" in contest:
        level = 1
    elif 'COURT OF APPEALS' in contest:
        level = 1
    return re.sub(r'^0', '', likely, count=1)

## Apply functions

In [18]:
#Columns to leave out of helper function operations
keep_names = ['pct_std', 'county_code', 'county_name','precinct', 'county_fips']

In [19]:
#Applying functions to all columns except for ones in keep_names list
contest_name_change_dict = {}

duplicate_value_list = []
for contest in pivoted_2022.columns:
    if contest not in keep_names:
        # Add in a condition about the 20
#         print("E-type", get_election_type(contest))
#         print("Race", get_race(contest))
#         print("Party", get_party(contest))
#         print("Name", get_name(contest))
        if "US HOUSE" in contest or 'COURT OF APPEALS' in contest:
            value = get_election_type(contest) + get_race(contest) + get_district(contest) + get_party(contest) + get_name(contest)
        else:
            value = get_election_type(contest) + "22" + get_race(contest) + get_party(contest) + get_name(contest)
        if value in contest_name_change_dict.values():
            duplicate_value_list.append(value)
        print(len(value))
        contest_name_change_dict[contest] = value

10
10
10
10
10
10
10
10
10


In [20]:
#Checking for duplicate columns
if len(duplicate_value_list) > 0:
    print("DUPLICATE VALUES")
    for val in [(k,v) for k, v in contest_name_change_dict.items() if v in duplicate_value_list]:
        print(val)

In [21]:
# Create a sorted dictionary to sort the columns of the final dataframe
sorted_contest_name_change_dict = dict(sorted(contest_name_change_dict.items(), key=lambda x:x[1]))

# Export the candidate name change dictionary for the README
holder = pd.DataFrame(sorted_contest_name_change_dict.values(),sorted_contest_name_change_dict.keys())
holder.to_csv("./output/field_names.csv", index = True)

In [22]:
#Rename the dataframe using this dictionary
pivoted_2022.rename(columns = contest_name_change_dict, inplace = True)

#Define a reversed version of the dictionary
contest_name_change_dict_rev = {v:k for k,v in contest_name_change_dict.items()}
display(contest_name_change_dict_rev)

{'GCON04LJOH': 'US HOUSE-:-ALDEN PATRICK JOHNSON-:-PARTY:LIB-:-004',
 'GCON02DTHO': 'US HOUSE-:-BENNIE G THOMPSON-:-PARTY:DEM-:-002',
 'GCON02RFLO': 'US HOUSE-:-BRIAN FLOWERS-:-PARTY:REP-:-002',
 'GCON01DBLA': 'US HOUSE-:-DIANNE BLACK-:-PARTY:DEM-:-001',
 'GCON04DDUP': 'US HOUSE-:-JOHNNY L DUPREE-:-PARTY:DEM-:-004',
 'GCON03RGUE': 'US HOUSE-:-MICHAEL GUEST-:-PARTY:REP-:-003',
 'GCON04REZE': 'US HOUSE-:-MIKE EZELL-:-PARTY:REP-:-004',
 'GCON03DYOU': 'US HOUSE-:-SHUWASKI A YOUNG-:-PARTY:DEM-:-003',
 'GCON01RKEL': 'US HOUSE-:-TRENT KELLY-:-PARTY:REP-:-001'}

In [23]:
#Additional cleaning
for col in list(contest_name_change_dict.values()):
    pivoted_2022[col] = pivoted_2022[col].astype(int)

# Check against state county-level results

County results file made by Redistricting Data Hub staff
<br><br>
Created by hand using MS Secretary of State PDF files

In [24]:
#Read in file
county_check = pd.read_csv('./raw-from-source/RDH_county_totals.csv')

In [25]:
#Cleaning
county_check.fillna(0, inplace=True)
county_check.iloc[:, 1:] = county_check.iloc[:, 1:].astype(int)
county_check.reset_index(inplace = True, drop = True)

county_check.head()

  county_check.iloc[:, 1:] = county_check.iloc[:, 1:].astype(int)


Unnamed: 0,county,GCON02DTHO,GCON02RFLO,GCON03DYOU,GCON03RGUE,GCON04DDUP,GCON04REZE,GCON04LJOH,GCON01DBLA,GCON01RKEL
0,ADAMS,4469,3817,0,0,0,0,0,0,0
1,AMITE,1538,3127,0,0,0,0,0,0,0
2,ATTALA,2008,3194,0,0,0,0,0,0,0
3,BOLIVAR,4912,2753,0,0,0,0,0,0,0
4,CARROLL,1112,2626,0,0,0,0,0,0,0


In [26]:
#Create dictionary for alt data source
contest_name_change_dict_counties = {}

#Checking for duplicate column names
duplicate_value_list = []
for contest in county_check.columns:
    if contest != "county":
        if value in contest_name_change_dict_counties.values():
            duplicate_value_list.append(value)
        print(len(value))
        contest_name_change_dict_counties[contest] = value

if len(duplicate_value_list) > 0:
    print("DUPLICATE VALUES")
    for val in [(k,v) for k, v in contest_name_change_dict_counties.items() if v in duplicate_value_list]:
        print(val)

10
10
10
10
10
10
10
10
10
DUPLICATE VALUES
('GCON02DTHO', 'GCON01RKEL')
('GCON02RFLO', 'GCON01RKEL')
('GCON03DYOU', 'GCON01RKEL')
('GCON03RGUE', 'GCON01RKEL')
('GCON04DDUP', 'GCON01RKEL')
('GCON04REZE', 'GCON01RKEL')
('GCON04LJOH', 'GCON01RKEL')
('GCON01DBLA', 'GCON01RKEL')
('GCON01RKEL', 'GCON01RKEL')


In [27]:
#Creating reverse dictionary for use in checking functions
contest_name_change_dict_counties_rev = {v:k for k,v in contest_name_change_dict_counties.items()}

In [28]:
#Checking out a list of contests that appear in county-level source but not precinct-level
{i:contest_name_change_dict_counties_rev[i] for i in list(county_check.columns) if i not in list(pivoted_2022.columns) and i !="county"}

{}

In [29]:
#Checking out a list of contests that appear in county-level source but not precinct-level
ignore_list = ['pct_std', 'county_code', 'precinct', 'county_fips', 'county','county_name']

{i:contest_name_change_dict_rev[i] for i in list(pivoted_2022.columns) if i not in list(county_check.columns) and i not in ignore_list}

{}

In [30]:
#Define a list of columns that appear in both dataframes
shared_cols = [i for i in list(pivoted_2022.columns) if i in list(county_check.columns)]
print(shared_cols)

['GCON04LJOH', 'GCON02DTHO', 'GCON02RFLO', 'GCON01DBLA', 'GCON04DDUP', 'GCON03RGUE', 'GCON04REZE', 'GCON03DYOU', 'GCON01RKEL']


In [31]:
#Same for unshared columns
unshared_cols = [i for i in list(pivoted_2022.columns) if i not in list(county_check.columns)]
print(unshared_cols)

['pct_std', 'county_code', 'county_name', 'precinct', 'county_fips']


In [32]:
#Defining statewide_totals_check & county_totals_check function

def statewide_totals_check(partner_df,source_df,column_list):
    """Compares the totals of two election result dataframes at the statewide total level

    Args:
      partner_df: DataFrame of election results we are comparing against
      source_df: DataFrame of election results we are comparing to
      column_list: List of races that there are votes for

    Returns:
      Nothing, only prints out an analysis
    """
    print("***Statewide Totals Check***")
    for race in column_list:
        if (partner_df[race].sum()- source_df[race].sum() != 0):
            print(race+" has a difference of "+str(partner_df[race].sum()-source_df[race].sum())+" votes")
            print("\PRECINCT-LEVEL: "+str(partner_df[race].sum())+" votes")
            print("\COUNTY-LEVEL: "+str(source_df[race].sum())+" votes")
        else:
            print(race + " is equal", "\tPRECINCT / COUNTY: " + str(partner_df[race].sum()))

def county_totals_check(partner_df,source_df,column_list,county_col,full_print=False):
    """Compares the totals of two election result dataframes at the county level

    Args:
      partner_df: DataFrame of election results we are comparing against
      source_df: DataFrame of election results we are comparing to
      column_list: List of races that there are votes for
      county_col: String of the column name that contains county information
      full_print: Boolean specifying whether to print out everything, including counties w/ similarities

    Returns:
      Nothing, only prints out an analysis
    """

    print("***Countywide Totals Check***")
    print("")
    diff_counties=[]
    for race in column_list:
        diff = partner_df.groupby([county_col]).sum()[race]-source_df.groupby([county_col]).sum()[race]
        for val in diff[diff != 0].index.values.tolist():
            if val not in diff_counties:
                diff_counties.append(val)
        if len(diff[diff != 0]!=0):
            print(race + " contains differences in these counties:")
            for val in diff[diff != 0].index.values.tolist():
                county_differences = diff[diff != 0]
                print("\t"+val+" has a difference of "+str(county_differences[val])+" votes")
                print("\t\PRECINCT-LEVEL: "+str(partner_df.groupby([county_col]).sum().loc[val,race])+" votes")
                print("\t\COUNTY-LEVEL: "+str(source_df.groupby([county_col]).sum().loc[val,race])+" votes")
            if (full_print):
                for val in diff[diff == 0].index.values.tolist():
                    county_similarities = diff[diff == 0]
                    print("\t"+val + ": "+ str(partner_df.groupby([county_col]).sum().loc[val,race])+" votes")
        else:
            print(race + " is equal across all counties")
            if (full_print):
                for val in diff[diff == 0].index.values.tolist():
                    county_similarities = diff[diff == 0]
                    print("\t"+val + ": "+ str(partner_df.groupby([county_col]).sum().loc[val,race])+" votes")
    if (len(diff_counties)>0):
        print()
        print(diff_counties)

No discrepancies w/ statewide totals

In [33]:
#Calling statewide function
statewide_totals_check(pivoted_2022, county_check, shared_cols)

***Statewide Totals Check***
GCON04LJOH is equal 	PRECINCT / COUNTY: 3569
GCON02DTHO is equal 	PRECINCT / COUNTY: 108285
GCON02RFLO is equal 	PRECINCT / COUNTY: 71884
GCON01DBLA is equal 	PRECINCT / COUNTY: 45238
GCON04DDUP is equal 	PRECINCT / COUNTY: 42876
GCON03RGUE is equal 	PRECINCT / COUNTY: 132481
GCON04REZE is equal 	PRECINCT / COUNTY: 127813
GCON03DYOU is equal 	PRECINCT / COUNTY: 54803
GCON01RKEL is equal 	PRECINCT / COUNTY: 122152


In [34]:
#Minor renaming for the county-level check
county_check.rename(columns = {"county":"county_name"}, inplace = True)

No discrepancies w/ county-wide totals

In [35]:
#Calling county function
county_totals_check(pivoted_2022, county_check, shared_cols, "county_name")

***Countywide Totals Check***

GCON04LJOH is equal across all counties
GCON02DTHO is equal across all counties
GCON02RFLO is equal across all counties
GCON01DBLA is equal across all counties
GCON04DDUP is equal across all counties
GCON03RGUE is equal across all counties
GCON04REZE is equal across all counties
GCON03DYOU is equal across all counties
GCON01RKEL is equal across all counties


# Minor additional cleaning

In [36]:
pivoted_2022.rename(columns = {'pct_std':'UNIQUE_ID',
                              'county_fips':'COUNTYFP',
                              'county_name':'CNTY_NAME',
                              'county_code':'CNTY_CODE',
                              'precinct':'POLL_LOC'}, inplace = True)

In [37]:
pivoted_2022 = pivoted_2022[['UNIQUE_ID', 'COUNTYFP', 'CNTY_CODE', 'CNTY_NAME', 'POLL_LOC']+list(contest_name_change_dict.values())]

# Export

In [38]:
#Re-order columns to place larger, statewide contests in front

#Set the order
base  = []
cong = []
for i in pivoted_2022:
    if i.startswith('GCON'):
        cong.append(i)
    else:
        base.append(i)
new_order = base + cong
dict_order = cong

print(base)

#Apply order to dataframe
pivoted_ordered_2022 = pivoted_2022.reindex(columns=new_order)
pivoted_ordered_2022.head()

['UNIQUE_ID', 'COUNTYFP', 'CNTY_CODE', 'CNTY_NAME', 'POLL_LOC']


Unnamed: 0,UNIQUE_ID,COUNTYFP,CNTY_CODE,CNTY_NAME,POLL_LOC,GCON04LJOH,GCON02DTHO,GCON02RFLO,GCON01DBLA,GCON04DDUP,GCON03RGUE,GCON04REZE,GCON03DYOU,GCON01RKEL
0,(01) NEW HOPE BAPTIST CHURCH:::PIK,113,PIK,PIKE,(01) NEW HOPE BAPTIST CHURCH,0,0,0,0,0,31,0,244,0
1,(02) S MCCOMB BAPTIST CHURCH:::PIK,113,PIK,PIKE,(02) S MCCOMB BAPTIST CHURCH,0,0,0,0,0,32,0,76,0
2,(03) SUMMIT UNITED METHODIST CHURCH:::PIK,113,PIK,PIKE,(03) SUMMIT UNITED METHODIST CHURCH,0,0,0,0,0,43,0,125,0
3,(04) MLK CENTER:::PIK,113,PIK,PIKE,(04) MLK CENTER,0,0,0,0,0,25,0,262,0
4,(05) NORTH MCCOMB BAPTIST CHURCH:::PIK,113,PIK,PIKE,(05) NORTH MCCOMB BAPTIST CHURCH,0,0,0,0,0,199,0,107,0


In [39]:
#Create a sorted dictionary for use in README file
sorted_contest_name_change_dict = {col: contest_name_change_dict_rev[col] for col in dict_order}
print(sorted_contest_name_change_dict)

#Export the dictionary for the README
holder = pd.DataFrame(sorted_contest_name_change_dict.values(),sorted_contest_name_change_dict.keys())
holder.to_csv("./field_names.csv", index = True)

{'GCON04LJOH': 'US HOUSE-:-ALDEN PATRICK JOHNSON-:-PARTY:LIB-:-004', 'GCON02DTHO': 'US HOUSE-:-BENNIE G THOMPSON-:-PARTY:DEM-:-002', 'GCON02RFLO': 'US HOUSE-:-BRIAN FLOWERS-:-PARTY:REP-:-002', 'GCON01DBLA': 'US HOUSE-:-DIANNE BLACK-:-PARTY:DEM-:-001', 'GCON04DDUP': 'US HOUSE-:-JOHNNY L DUPREE-:-PARTY:DEM-:-004', 'GCON03RGUE': 'US HOUSE-:-MICHAEL GUEST-:-PARTY:REP-:-003', 'GCON04REZE': 'US HOUSE-:-MIKE EZELL-:-PARTY:REP-:-004', 'GCON03DYOU': 'US HOUSE-:-SHUWASKI A YOUNG-:-PARTY:DEM-:-003', 'GCON01RKEL': 'US HOUSE-:-TRENT KELLY-:-PARTY:REP-:-001'}


In [40]:
#Export final CSV
output = os.path.join(wd,'output')
ms_2022_gen_prec = os.path.join(output,'ms_2022_gen_prec')
if not os.path.exists(ms_2022_gen_prec):
    os.mkdir(ms_2022_gen_prec)

pivoted_ordered_2022.to_csv("./output/ms_2022_gen_prec/ms_2022_gen_prec.csv", index = False)