## Cobb County Georgia 2022 Municipal Election Returns Joined to Precinct Boundaries

### Sections
- <a href="#ETL">Cleaning Precinct-Level Election Results</a><br>
- <a href="#check">Vote Totals Checks</a><br>
- <a href="#maup">Join with Election Returns</a><br>
- <a href="#exp">Export Cleaned Precinct-Level Datasets</a><br>
- <a href="#readme">Creating README</a><br>

#### Sources
Precint-level data from [Georgia Secretary of State Certified Results Cobb County - XML format](https://results.enr.clarityelections.com//GA/Cobb/115499/313474/reports/detailxls.zip).

RDH Statewide General Election Results and Precinct Boundaries[ Georgia 2022 General Election Precinct-Level Results and Boundaries](https://redistrictingdatahub.org/dataset/georgia-2022-general-election-precinct-level-results-and-boundaries/)
    
County-level data to run checks from [Georgia Secretary of State County Summary Report for Cobb County - CSV format](https://results.enr.clarityelections.com//GA/Cobb/115499/313474/reports/summary.zip)

In [1]:
import geopandas as gp
import pandas as pd
import os
import xml.etree.ElementTree as et
import numpy as np
import re
import GA22_helper as hlp
pd.set_option("display.max_rows", None)

<p><a name="ETL"></a></p>

### Cleaning Precint Level Election Results

Read in precint level election results

In [2]:
# Adapted ph's xml election parser
def ph_clarityelec_xml(file_path, str_electype):
    xtree = et.parse(file_path)
    xroot = xtree.getroot()
    z=[]
    county_area = xroot.findall(".//Region")
    for i in county_area:
        county = i.text
    contests = xroot.findall(".//Contest")
    for i in contests:
        contest = i.attrib.get('text')
        lower = i.findall("./Choice")
        for j in lower:
            choice = j.attrib.get('text')
            lower_2 = j.findall("./VoteType")
            for k in lower_2:
                voting_method = k.attrib.get('name')
                lower_3 = k.findall("./Precinct")
                for l in lower_3:
                    precinct_name = l.attrib.get('name')
                    num_votes = l.attrib.get('votes')
                    z.append([county,contest,choice,voting_method,precinct_name,num_votes])
    dfcols = ['county','contest','choice','voting_method','precinct','num_votes']
    df_ = pd.DataFrame(z,columns=dfcols)
    df_["election"] = str_electype
    df_["precinct"] = df_["precinct"].str.strip()
    return df_

In [3]:
# Read in Cobb County election returns
file_path = "./raw-from-source/Cobb/detail.xml"
er = ph_clarityelec_xml(file_path, "County Contest")
er.head()

Unnamed: 0,county,contest,choice,voting_method,precinct,num_votes,election
0,Cobb,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Acworth 1A,87,County Contest
1,Cobb,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Acworth 1B,70,County Contest
2,Cobb,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Acworth 1C,28,County Contest
3,Cobb,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Addison 01,48,County Contest
4,Cobb,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Austell 1A,11,County Contest


In [4]:
er.num_votes = er.num_votes.astype(int)
er.num_votes.dtype

dtype('int32')

In [5]:
# Look at contest types and subset for those pertaining to county contests
er.contest.unique()
keep_contests = ['Solicitor General',
                 'County Commission - District 1',
                 'County Commission - District 3',
                 'County BOE - District 2',
                 'County BOE - District 4',
                 'County BOE - District 6',
                 'Soil and Water Conservation District Supervisor - Cobb County',
                 'Special Soil and Water Conservation District Supervisor - Cobb County',
                 'Special City Council Post 1 - Kennesaw',
                 'Cityhood Question - Mableton']

In [6]:
#subset dataframe by contests of interest only
er_ = er[er['contest'].isin(keep_contests)].copy()

In [7]:
er_.head()

Unnamed: 0,county,contest,choice,voting_method,precinct,num_votes,election
18380,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1A,107,County Contest
18381,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1B,96,County Contest
18382,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1C,31,County Contest
18383,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Addison 01,60,County Contest
18384,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Austell 1A,15,County Contest


In [8]:
#Create UNIQUE_ID col
er_['UNIQUE_ID'] = er_['county'] + '-:-' +er_['precinct']

In [9]:
#Function cleans candidate and contest strings, and combines into a pivot column
def create_pivot_col(df, name_string, contest_string, pivot_string):
    df[name_string] = df[name_string].apply(lambda x: str(x).strip())
    df[contest_string] = df[contest_string].apply(lambda x: str(x).strip())
    df[name_string] = df[name_string].apply(lambda x:' '.join(str(x).split())) # This removes extra spaces between first and last name
    substrings_to_remove = ['.', "'", '"', ',', '(I)']
    for substring in substrings_to_remove:
        df[name_string] = df[name_string].apply(lambda x: x.replace(substring, ''))
        df[contest_string] = df[contest_string].apply(lambda x: x.replace(substring, ''))
    #Anomalies specific to this election
    df[name_string] = df[name_string].apply(lambda x: str(x).strip())
    df[contest_string] = df[contest_string].apply(lambda x: str(x).strip())
    df[pivot_string]= df[name_string]+ ' -:- ' + df[contest_string]
    return df

In [10]:
#create pivot col for precinct df
er_cobb = create_pivot_col(er_, 'choice', 'contest', 'pivot')
er_cobb.head()

Unnamed: 0,county,contest,choice,voting_method,precinct,num_votes,election,UNIQUE_ID,pivot
18380,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1A,107,County Contest,Cobb-:-Acworth 1A,Courtney Martin Brubaker (Rep) -:- Solicitor G...
18381,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1B,96,County Contest,Cobb-:-Acworth 1B,Courtney Martin Brubaker (Rep) -:- Solicitor G...
18382,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1C,31,County Contest,Cobb-:-Acworth 1C,Courtney Martin Brubaker (Rep) -:- Solicitor G...
18383,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Addison 01,60,County Contest,Cobb-:-Addison 01,Courtney Martin Brubaker (Rep) -:- Solicitor G...
18384,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Austell 1A,15,County Contest,Cobb-:-Austell 1A,Courtney Martin Brubaker (Rep) -:- Solicitor G...


In [11]:
rename_df = pd.read_csv('./raw-from-source/rename_df.csv')

In [12]:
rename_df

Unnamed: 0,UNIQUE_ID,PIVOT_COL
0,G22SLGRBRU,Courtney Martin Brubaker (Rep) -:- Solicitor G...
1,G22SLGDMET,Makia Metzger (Dem) -:- Solicitor General
2,GCNC1RGAM,Keli Gambrill (Rep) -:- County Commission - D...
3,GCNC3RBIR,JoAnn K Birrell (Rep) -:- County Commission -...
4,GCNC3DTRI,Christine Triebsch (Dem) -:- County Commission...
5,GBOE2RGEO,Stephen M George Jr (Rep) -:- County BOE - Dis...
6,GBOE2DSAY,Becky Sayler (Dem) -:- County BOE - District 2
7,GBOE4RCHA,David Chastain (Rep) -:- County BOE - District 4
8,GBOE4DPOZ,Catherine Pozniak (Dem) -:- County BOE - Distr...
9,GBOE6DDAV,Nichelle A Davis (Dem) -:- County BOE - Distri...


In [13]:
# create rename dict to rename primary precincts in the above instances
uid_dict = dict(zip(rename_df['PIVOT_COL'], rename_df['UNIQUE_ID']))

In [14]:
# apply rename dict
er_cobb['pivot'] = er_cobb['pivot'].replace(uid_dict)

In [15]:
er_cobb.head()

Unnamed: 0,county,contest,choice,voting_method,precinct,num_votes,election,UNIQUE_ID,pivot
18380,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1A,107,County Contest,Cobb-:-Acworth 1A,G22SLGRBRU
18381,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1B,96,County Contest,Cobb-:-Acworth 1B,G22SLGRBRU
18382,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Acworth 1C,31,County Contest,Cobb-:-Acworth 1C,G22SLGRBRU
18383,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Addison 01,60,County Contest,Cobb-:-Addison 01,G22SLGRBRU
18384,Cobb,Solicitor General,Courtney Martin Brubaker (Rep),Absentee by Mail Votes,Austell 1A,15,County Contest,Cobb-:-Austell 1A,G22SLGRBRU


In [16]:
er_pvt =pd.pivot_table(er_cobb,index=["UNIQUE_ID"],columns=["pivot"],values=['num_votes'],aggfunc=sum).fillna(0)

In [17]:
er_pvt.head()

Unnamed: 0_level_0,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes,num_votes
pivot,G22CQMNO,G22CQMYES,G22SLGDMET,G22SLGRBRU,G22SWSNLOY,G22SWSNSNE,GBOE2DSAY,GBOE2RGEO,GBOE4DPOZ,GBOE4RCHA,...,GCNC1RGAM,GCNC3DTRI,GCNC3RBIR,GCTC1NACR,GCTC1NBLI,GCTC1NBOT,GCTC1NBOW,GCTC1NBUR,GCTC1NGUT,GCTC1NORO
UNIQUE_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Cobb-:-Acworth 1A,0.0,0.0,1482.0,1671.0,2567.0,2528.0,0.0,0.0,0.0,0.0,...,2450.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cobb-:-Acworth 1B,0.0,0.0,1236.0,1567.0,2305.0,2291.0,0.0,0.0,0.0,0.0,...,2226.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cobb-:-Acworth 1C,0.0,0.0,1470.0,848.0,1866.0,1813.0,0.0,0.0,0.0,0.0,...,1670.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cobb-:-Addison 01,0.0,0.0,731.0,827.0,1296.0,1308.0,0.0,0.0,777.0,792.0,...,0.0,723.0,834.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cobb-:-Austell 1A,0.0,0.0,867.0,290.0,929.0,922.0,148.0,16.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
#Clean up indexing
er_pvt.reset_index(inplace = True)
er_pvt.columns = er_pvt.columns.droplevel(0)

In [19]:
er_pvt.columns = ['UNIQUE_ID'] + er_pvt.columns[1:].to_list()

In [20]:
sos_totals = pd.read_csv('./raw-from-source/summary/summary.csv')

In [21]:
sos_totals.head(1)

Unnamed: 0,line number,contest name,choice name,party name,total votes,percent of votes,registered voters,ballots cast,num Precinct total,num Precinct rptg,over votes,under votes
0,1,US Senate (Vote For 1),Herschel Junior Walker (Rep),,125795,40.49,507459,313116,147,147,0,1949


In [22]:
keep_sos = sos_totals['contest name'].unique()[-14:]

In [23]:
sos = sos_totals[sos_totals['contest name'].isin(keep_sos)].copy()

In [24]:
sos[['choice name', 'contest name', 'total votes']]

Unnamed: 0,choice name,contest name,total votes
69,Courtney Martin Brubaker (Rep),Solicitor General (Vote For 1),145881
70,Makia Metzger (Dem),Solicitor General (Vote For 1),160095
71,Keli Gambrill (I) (Rep),County Commission - District 1 (Vote For 1),72731
72,JoAnn K. Birrell (I) (Rep),County Commission - District 3 (Vote For 1),53862
73,Christine Triebsch (Dem),County Commission - District 3 (Vote For 1),39908
74,"Stephen M. George, Jr. (Rep)",County BOE - District 2 (Vote For 1),10763
75,Becky Sayler (Dem),County BOE - District 2 (Vote For 1),23468
76,David Chastain (I) (Rep),County BOE - District 4 (Vote For 1),25548
77,Catherine Pozniak (Dem),County BOE - District 4 (Vote For 1),21862
78,Nichelle A. Davis (Dem),County BOE - District 6 (Vote For 1),27615


In [25]:
er_pvt.sum()

UNIQUE_ID     Cobb-:-Acworth 1ACobb-:-Acworth 1BCobb-:-Acwor...
G22CQMNO                                                11694.0
G22CQMYES                                               13191.0
G22SLGDMET                                             160095.0
G22SLGRBRU                                             145881.0
G22SWSNLOY                                             253361.0
G22SWSNSNE                                             252980.0
GBOE2DSAY                                               23468.0
GBOE2RGEO                                               10763.0
GBOE4DPOZ                                               21862.0
GBOE4RCHA                                               25548.0
GBOE6DDAV                                               27615.0
GCNC1RGAM                                               72731.0
GCNC3DTRI                                               39908.0
GCNC3RBIR                                               53862.0
GCTC1NACR                               

In [26]:
ga_gen_shp = gp.read_file('./raw-from-source/ga_2022_gen_prec/ga_2022_gen_prec_no_splits/ga_2022_gen_prec_no_splits.shp')
# make sure unique id for precinct match lines up
ga_gen_shp['UNIQUE_ID'] = ga_gen_shp['county'] +'-:-'+ ga_gen_shp['precinct']
# limit to Crisp County
cobb_shp = ga_gen_shp[ga_gen_shp['county'] == 'Cobb']

In [27]:
#Check for precinct names, looking for 147
cobb_shp.precinct.nunique()

147

In [28]:
set(cobb_shp['UNIQUE_ID'].unique()) == set(er_pvt['UNIQUE_ID'].unique())

True

In [29]:
# merge
cobb_pber = cobb_shp[['UNIQUE_ID', 'geometry']].merge(er_pvt, on='UNIQUE_ID', how='outer', indicator=True)

In [30]:
# check
cobb_pber._merge.value_counts()

both          147
left_only       0
right_only      0
Name: _merge, dtype: int64

In [31]:
#Drop Merge Column
cobb_pber = cobb_pber.drop(columns=['_merge'])

In [32]:
cobb_pber.head()

Unnamed: 0,UNIQUE_ID,geometry,G22CQMNO,G22CQMYES,G22SLGDMET,G22SLGRBRU,G22SWSNLOY,G22SWSNSNE,GBOE2DSAY,GBOE2RGEO,...,GCNC1RGAM,GCNC3DTRI,GCNC3RBIR,GCTC1NACR,GCTC1NBLI,GCTC1NBOT,GCTC1NBOW,GCTC1NBUR,GCTC1NGUT,GCTC1NORO
0,Cobb-:-Acworth 1A,"MULTIPOLYGON (((-84.70906 34.05580, -84.70894 ...",0.0,0.0,1482.0,1671.0,2567.0,2528.0,0.0,0.0,...,2450.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Cobb-:-Acworth 1B,"POLYGON ((-84.69258 34.07828, -84.69240 34.078...",0.0,0.0,1236.0,1567.0,2305.0,2291.0,0.0,0.0,...,2226.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cobb-:-Acworth 1C,"MULTIPOLYGON (((-84.63610 34.07493, -84.63747 ...",0.0,0.0,1470.0,848.0,1866.0,1813.0,0.0,0.0,...,1670.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cobb-:-Addison 01,"POLYGON ((-84.49564 34.01312, -84.49631 34.013...",0.0,0.0,731.0,827.0,1296.0,1308.0,0.0,0.0,...,0.0,723.0,834.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Cobb-:-Austell 1A,"POLYGON ((-84.63531 33.83736, -84.63440 33.837...",0.0,0.0,867.0,290.0,929.0,922.0,148.0,16.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
#make new columns
cobb_pber['COUNTY'] = 'Cobb'
cobb_pber['PRECINCT'] = cobb_pber['UNIQUE_ID'].apply(lambda x: x.split('-:-')[1])

In [34]:
# rearrange columns
cobb_pber = cobb_pber[['UNIQUE_ID', 'COUNTY', 'PRECINCT'] + cobb_pber.columns[2:-2].to_list() + ['geometry']]

In [35]:
cobb_pber.head()

Unnamed: 0,UNIQUE_ID,COUNTY,PRECINCT,G22CQMNO,G22CQMYES,G22SLGDMET,G22SLGRBRU,G22SWSNLOY,G22SWSNSNE,GBOE2DSAY,...,GCNC3DTRI,GCNC3RBIR,GCTC1NACR,GCTC1NBLI,GCTC1NBOT,GCTC1NBOW,GCTC1NBUR,GCTC1NGUT,GCTC1NORO,geometry
0,Cobb-:-Acworth 1A,Cobb,Acworth 1A,0.0,0.0,1482.0,1671.0,2567.0,2528.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((-84.70906 34.05580, -84.70894 ..."
1,Cobb-:-Acworth 1B,Cobb,Acworth 1B,0.0,0.0,1236.0,1567.0,2305.0,2291.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((-84.69258 34.07828, -84.69240 34.078..."
2,Cobb-:-Acworth 1C,Cobb,Acworth 1C,0.0,0.0,1470.0,848.0,1866.0,1813.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((-84.63610 34.07493, -84.63747 ..."
3,Cobb-:-Addison 01,Cobb,Addison 01,0.0,0.0,731.0,827.0,1296.0,1308.0,0.0,...,723.0,834.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((-84.49564 34.01312, -84.49631 34.013..."
4,Cobb-:-Austell 1A,Cobb,Austell 1A,0.0,0.0,867.0,290.0,929.0,922.0,148.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"POLYGON ((-84.63531 33.83736, -84.63440 33.837..."


In [36]:
# Clean up
cobb_pber['geometry'] = cobb_pber.geometry.buffer(0)
#Fill NA
cobb_pber = cobb_pber.fillna(0)

In [37]:
if not os.path.exists("./GA22_COBB_PBER/"):
    os.mkdir("./GA22_COBB_PBER/")
    
cobb_pber.to_file("./GA22_COBB_PBER/GA22_COBB_PBER.shp")

In [38]:
def check_file(path):
    if ".csv" in path:
        file_type = ".csv"
        load = pd.read_csv(path)
    elif ".shp" in path:
        file_type = ".shp"
        load = gp.read_file(path)
        assert(os.path.exists(path[0:-4] + ".cpg"))
        assert(os.path.exists(path[0:-4] + ".dbf"))
        assert(os.path.exists(path[0:-4] + ".prj"))
        assert(os.path.exists(path[0:-4] + ".shx"))
    else:
        raise ValueError ("Other function needed to load non .csv or .shp")
    run_checks(load, path, file_type)
    
def run_checks(file, path, file_type):
    if "/Users" in path:
        path = path.replace("/Users","")
    path_name = path.replace("/","-")
    
        
    with open("./FILE_CHECK" + path_name + ".txt","w") as t:
        t.write("***RDH FILE CHECKER***\n")
        t.write("Checking file located at " + path+"\n")
        t.write("\n")
        t.write("\n")        
        
        t.write("***N/A VALUES***\n")
        if(file.isna().any().sum() != 0):
            t.write("N/A values in the following columns\n")
            for col in file.columns:
                num_nas = file[col].isna().sum()
                string_nans = file[file[col]=="nan"].shape[0]
                if file[col].isna().any().sum():
                    t.write("\t"+col+" - "+ str(num_nas) +" row(s)\n")
                elif  string_nans > 0:
                    t.write("\t"+col+" - "+ str(string_nans) +" row(s)\n")

        else:
            t.write("Do not appear to be N/A values")
        t.write("\n")
        t.write("\n")
        
        t.write("***File Size***\n")
        t.write("File contains " + str(file.shape[0]) + " row(s)\n")
        t.write("File contains " + str(file.shape[1]) + " column(s)\n")
        t.write("\n")
        t.write("\n")
        
        if file_type == ".shp":
            t.write("***Invalid Geometries***\n")
            t.write("File contains " + str(file[~file["geometry"].is_valid].shape[0]) + " invalid GEOM row(s)\n")
            t.write("File contains " + str(file[file["geometry"].is_valid].shape[0]) + " valid GEOM row(s)\n")
            t.write("\n")
            t.write("\n")

        t.write("***COLUMN NAMES***\n")
        t.write(str(list(file.columns)))
        t.write("\n")
        t.write("\n")
        
        t.write("***COLUMN NAME LENGTHS***\n")
        long_col_name_list = [i for i in file.columns if len(i) > 10]
        if len(long_col_name_list) > 0:
            t.write("The following columns are longer than 10 characters:" + str(long_col_name_list)+"\n")
        else:
            t.write("No column names are longer than 10 characters\n")
        t.write("\n")
        t.write("\n")

        t.write("***FULL FILE SUMMARY***\n")
        t.write(str(file.describe())+"\n")
        t.write("\n")
        t.write("\n")

        t.write("***MAX VALUE FOR EVERY DATA COLUMN***\n")
        t.write(str(file.max()) + "\n")
        t.write("\n")
        t.write("\n")

        t.write("***MIN VALUE FOR EVERY DATA COLUMN***\n")
        t.write(str(file.min()) + "\n")
        t.write("\n")
        
        t.write("***NUMERIC COLUMN SUMS***\n")
        t.write(str(file.sum(numeric_only=True))+"\n")
        t.write("\n")
        t.write("\n")

        t.write("***COLUMNS AND DUPLICATE VALUES***\n")
        for col in list(file.columns):
            if col != "geometry":
                if max(file[col].value_counts(dropna = False))==1:
                    t.write("NO: Column " + col + " DOES NOT HAVE duplicate values\n")
                elif max(file[col].value_counts(dropna = False))>1:
                    t.write("YES: Column " + col+ " HAS duplicate values\n")
        t.write("\n")


In [39]:
check_file("./GA22_COBB_PBER/GA22_COBB_PBER.shp")