#### Check before running code:
1. URL (block 3, line 5)

### Importing libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
import datetime as dt
import os

In [2]:
### ignores copy warning
pd.options.mode.chained_assignment = None 

### Scraping results at sub-precinct level

In [3]:
### creates directory to store final dataframe after exporting
os.makedirs("governor_hillsborough/scraper_files/", exist_ok = True)

### sets URL
url = "https://enr.electionsfl.org/HIL/3311/Precincts/46493/0/447/"

### retrieves data
page = requests.get(url)
### converts data to text for reading
soup = BeautifulSoup(page.text)

### stores all div-tags with class name "Race row"
divs_race = soup.find_all("div", {"class": "Race row"})

### creates list to write data into later
all_rows = []

### loop runs through all the stored divs
for i in range(0, len(divs_race)):
    
    ### stores precinct name
    precinct = divs_race[i].find("span", {"class": "PrecinctName"}).text
    ### stores precinct number
    precinct_no = precinct[-5:]
    ### stores base precinct number
    base_precinct_no = int(precinct_no[:3])
    
    ### stores all the div-tags with the candidate names
    names_tag = divs_race[i].find_all("td", {"class": "ChoiceColumn"}) 
    ### stores number of candidates
    candidate_len = len(names_tag)
    
    ### stores all the div-tags with the votes
    results_tag = divs_race[i].find_all("td", {"class": "DetailResultsColumn notranslate TotalVotes"})
    
    ### loop runs through all the divs with the candidate names
    for j in range(0, len(names_tag)):
        
        ### creates list to write data into (in format: [precinct details, name, votes])
        row = []
        
        ### writes precinct details into list
        row.append(precinct)
        row.append(precinct_no)
        row.append(base_precinct_no)
        
        ### retrieves, stores and writes candidate name into list
        name = names_tag[j].text
        name = " ".join(name.split())
        row.append(name)
        
        ### retrieves, stores and writes votes obtained
        votes = results_tag[j].text.replace(',',"")
        row.append(votes)
        
        ### stores each list with precinct, candidate and vote details as elements of a master list — a list of lists
        all_rows.append(row)
    
    ### converts the master list into dataframe
    df_votes = pd.DataFrame(all_rows, columns = ["PRECINCT", "PRECINCT_NUMBER", "BASE_PRECINCT_NUMBER", "CANDIDATE", "VOTES"])

### displays dataframe
df_votes

Unnamed: 0,PRECINCT,PRECINCT_NUMBER,BASE_PRECINCT_NUMBER,CANDIDATE,VOTES
0,101,101,101,Ron DeSantis (REP),1273
1,101,101,101,Charlie Crist (DEM),998
2,101,101,101,Hector Roos (LPF),7
3,101,101,101,Carmen Jackie Gimenez (NPA),11
4,103,103,103,Ron DeSantis (REP),1764
...,...,...,...,...,...
1787,993,993,993,Carmen Jackie Gimenez (NPA),0
1788,999,999,999,Ron DeSantis (REP),156
1789,999,999,999,Charlie Crist (DEM),191
1790,999,999,999,Hector Roos (LPF),0


### Compiling results into precinct-level

In [4]:
### converts number of votes into integers
df_votes["VOTES"] = df_votes["VOTES"].astype(int)
### groups sub-precincts into base precincts
df_voteprecinct = df_votes.groupby(["BASE_PRECINCT_NUMBER", "CANDIDATE"]).agg({"VOTES": "sum"}).reset_index()

### loop runs through newly created dataframe
for i in range(0, len(df_voteprecinct)):
    ### converts base precinct numbers into strings
    df_voteprecinct["BASE_PRECINCT_NUMBER"][i] = str(df_voteprecinct["BASE_PRECINCT_NUMBER"][i])

### displays dataframe
df_voteprecinct

Unnamed: 0,BASE_PRECINCT_NUMBER,CANDIDATE,VOTES
0,101,Carmen Jackie Gimenez (NPA),11
1,101,Charlie Crist (DEM),998
2,101,Hector Roos (LPF),7
3,101,Ron DeSantis (REP),1273
4,103,Carmen Jackie Gimenez (NPA),9
...,...,...,...
1787,993,Ron DeSantis (REP),17
1788,999,Carmen Jackie Gimenez (NPA),0
1789,999,Charlie Crist (DEM),191
1790,999,Hector Roos (LPF),0


### Converting dataset into horizontal format

In [5]:
### initializes a counter to store number of rows already processed
counter = 0

### initializes new list (referred to as "final list" later) to write data into
final_rows = []

### loop runs through dataframe
for i in range(counter, len(df_voteprecinct)):
    
    ### stores base precinct number
    base_precinct = df_voteprecinct["BASE_PRECINCT_NUMBER"][i]
    
    ### stores time of processing/update
    tnow = dt.datetime.now()
    dt_string = tnow.strftime("%m-%d-%Y %H:%M")
    update = "Last updated at " + str(dt_string)
    
    ### creates new list with first element as processing time
    pre_row = [update]
    ### adds base precinct number to list
    pre_row.append(base_precinct)
    
    ### nested loop runs through dataframe but starts from number denoted by counter
    ### this ensures that rows already read and processed are not read again
    for j in range(counter, len(df_voteprecinct)):
        
        ### if the base precinct number matches, then candidate details are stored in list
        if df_voteprecinct["BASE_PRECINCT_NUMBER"][j] == base_precinct:
            
            ### adds candidate name to list
            c_name = df_voteprecinct["CANDIDATE"][j]
            c_name = c_name.replace("WRITE-IN", "Write-In")
            pre_row.append(c_name)
            ### adds votes won to list
            pre_row.append(df_voteprecinct["VOTES"][j])
            ### extracts party name and adds to list
            c_party = df_voteprecinct["CANDIDATE"][j][-4:][:-1]
            c_party = c_party.replace("E-I", "OTHER")
            pre_row.append(c_party)
            
            
            ### counter moves by one
            counter = counter + 1
            
            ### try-except block handles exception that will occur in last row of dataframe with below commands
            try:
                
                ### if counter is the same as length of dataframe, then last row is reached
                ### the list-element is added to the final list
                if counter == len(df_voteprecinct):
                    ### adds list to final list
                    final_rows.append(pre_row)
                    
                ### if base precinct number in next row is not same as base precinct number in current row,
                ### then details of that precinct has completely been recorded
                ### the list-element is added to the final list
                elif df_voteprecinct["BASE_PRECINCT_NUMBER"][j+1] != df_voteprecinct["BASE_PRECINCT_NUMBER"][j]:
                    ### adds list to final list
                    final_rows.append(pre_row)
                
                ### continues to next iteration
                else:
                    continue
            ### handles exception
            except:
                pass
    
    ### converts final list to dataframe
    df = pd.DataFrame(final_rows)

### displays dataframe
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,Last updated at 11-14-2022 20:55,101,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),998,DEM,Hector Roos (LPF),7,LPF,Ron DeSantis (REP),1273,REP
1,Last updated at 11-14-2022 20:55,103,Carmen Jackie Gimenez (NPA),9,NPA,Charlie Crist (DEM),1118,DEM,Hector Roos (LPF),9,LPF,Ron DeSantis (REP),1764,REP
2,Last updated at 11-14-2022 20:55,105,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),1089,DEM,Hector Roos (LPF),10,LPF,Ron DeSantis (REP),1129,REP
3,Last updated at 11-14-2022 20:55,107,Carmen Jackie Gimenez (NPA),19,NPA,Charlie Crist (DEM),1242,DEM,Hector Roos (LPF),10,LPF,Ron DeSantis (REP),1681,REP
4,Last updated at 11-14-2022 20:55,108,Carmen Jackie Gimenez (NPA),6,NPA,Charlie Crist (DEM),660,DEM,Hector Roos (LPF),5,LPF,Ron DeSantis (REP),923,REP


### Computing total votes tallied

In [6]:
### storing number of columns
col_no = len(df.columns)

### creates new column to write totals into at the end
### since this is an index-number, it automatically computes the required column number irrespective of number of candidates
df[col_no] = " "

### creates list to store tallied votes
votes_tallied = []

### loop runs through newly created horizontal dataframe
for i in range(0, len(df)):
    
    ### initializes variable to store total
    total = 0
    
    ### nested loop starts at column 3 (that is the first column with a vote number)
    ### loop iterates (number of candidates*3 + 1) at intervals of three to read all tallied votes
    ### note: each candidate has three corresponding elements—name,votes,party—hence the (n*3+1)
    for j in range(3, (candidate_len*3 + 1), 3):
        ### adds votes to total
        total = total + df[j][i]
    
    ### adds total to list
    votes_tallied.append(total)
    ### writes total into new column
    df[col_no][i] = total

### displays dataframe
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,Last updated at 11-14-2022 20:55,101,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),998,DEM,Hector Roos (LPF),7,LPF,Ron DeSantis (REP),1273,REP,2289
1,Last updated at 11-14-2022 20:55,103,Carmen Jackie Gimenez (NPA),9,NPA,Charlie Crist (DEM),1118,DEM,Hector Roos (LPF),9,LPF,Ron DeSantis (REP),1764,REP,2900
2,Last updated at 11-14-2022 20:55,105,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),1089,DEM,Hector Roos (LPF),10,LPF,Ron DeSantis (REP),1129,REP,2239
3,Last updated at 11-14-2022 20:55,107,Carmen Jackie Gimenez (NPA),19,NPA,Charlie Crist (DEM),1242,DEM,Hector Roos (LPF),10,LPF,Ron DeSantis (REP),1681,REP,2952
4,Last updated at 11-14-2022 20:55,108,Carmen Jackie Gimenez (NPA),6,NPA,Charlie Crist (DEM),660,DEM,Hector Roos (LPF),5,LPF,Ron DeSantis (REP),923,REP,1594
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443,Last updated at 11-14-2022 20:55,990,Carmen Jackie Gimenez (NPA),3,NPA,Charlie Crist (DEM),491,DEM,Hector Roos (LPF),5,LPF,Ron DeSantis (REP),336,REP,835
444,Last updated at 11-14-2022 20:55,991,Carmen Jackie Gimenez (NPA),2,NPA,Charlie Crist (DEM),70,DEM,Hector Roos (LPF),1,LPF,Ron DeSantis (REP),148,REP,221
445,Last updated at 11-14-2022 20:55,992,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),3,DEM,Hector Roos (LPF),0,LPF,Ron DeSantis (REP),5,REP,8
446,Last updated at 11-14-2022 20:55,993,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),24,DEM,Hector Roos (LPF),1,LPF,Ron DeSantis (REP),17,REP,42


### Computing precinct-level vote shares, leads and margins

In [7]:
### creates new list to store index numbers of newly created columns
new_cols = []

### loop runs through number of candidates and four more for other details
for i in range(0, (candidate_len + 4)):   
    ### creates (n+4) new columns for "n" candidates
    df[col_no + i + 1] = " "
    ### stores numbers in a list as strings
    new_cols.append(str(col_no + i + 1))

### loop runs through each row of dataframe
for i in range(0, len(df)):
    
    ### creates lists to store details
    votes_obtained = []
    party = []
    can_names = []
    
    ### stores the total votes tallied for each row
    total = votes_tallied[i]
    
    ### checks whether votes have been tallied
    if total != 0: 
    
        ## TALLYING VOTES

        ### nested loop starts at column 3 (that is the first column with a vote number)
        ### loop iterates (number of candidates*3 + 1) at intervals of three to read all details
        ### note: each candidate has three corresponding elements—name,votes,party—hence the (n*3+1)
        for j in range(3, (candidate_len*3 + 1), 3):

            ### stores votes, party and candidate names in the relevant lists
            ### note: here format is [cols x rows] while traversing through dataframe
            votes_obtained.append(df[j][i])
            party.append(df[j+1][i])
            can_names.append(df[j-1][i])


        ## COMPUTING VOTE-SHARES

        ### loop runs through all the candidates
        for k in range(0, candidate_len):
            ### column-index of a candidate is given by corresponding element in "new_cols"
            ### first candidate (k = 0) will be stored in column at index-0 in the "new_cols" list and so on
            column_no = int(new_cols[k])
            ### computes percent and writes into the column
            df[column_no][i] = (votes_obtained[k]/total*100).round(2)

        ## COMPUTING LEADING CANDIDATES AND WIN MARGINS

        ### computes and stores the highest vote-tally for the "votes_obtained" list
        max_value = max(votes_obtained)

        ### checks if there are more elements with same value (in case of ties)
        if votes_obtained.count(max_value) > 1:

            ### method returns indices of all duplicate highest vote-tallies
            indices = []
            def find_indices(list_to_check, item_to_find):
                return [idx for idx, value in enumerate(list_to_check) if value == item_to_find]

            ### stores indices of all duplicate highest vote-tallies
            indices = find_indices(votes_obtained, max_value)

            ### constructs string of all candidate names corresponding to highest vote-tallies
            string = []
            for ind in range(0, len(indices)):
                index = indices[ind]
                string.append(can_names[index])

            ### stores value as "TIED"
            lead_party = "TIED"
            ### stores candidate names with tied votes
            lead_can = " and ".join([", ".join(string[:-1]),string[-1]] if len(string) > 2 else string)
            ### stores margin of difference as 0
            margin = 0
            margin_pp = 0

        else:
            ### computes and stores the index at which the highest vote is stored in the "votes_obtained" list
            max_index = votes_obtained.index(max_value)
            ### stores name of party corresponding to highest vote-tally in the fourth-last column
            ### if highest vote-tally is second (index=1) element in "votes_obtained"
            ### the second element (index=1) in "party" list is the corresponding datapoint
            lead_party = party[max_index]
            ### similarly, stores name of candidate corresponding to highest vote-tally in the next (third-last) column
            lead_can = can_names[max_index]

            ### computes and stores the second highest vote-tally for the "votes_obtained" list
            second_max_value = max(votes_obtained, key = lambda x: min(votes_obtained)-1 if (x == max_value) else x)
            ### computes and stores the raw number of votes between first and second placed candidates
            margin = max_value - second_max_value
            ### computes the difference in percent points between first and second placed candidates
            margin_pp = ((max_value/total*100)-(second_max_value/total*100)).round(2)

        ### stores index-number of the fourth-last element of "new_cols" list (this will be the fourth-last column)
        lead_col = int(new_cols[-4])
        ### stores name of party corresponding to highest vote-tally in the fourth-last column
        ### if highest vote-tally is second (index=1) element in "votes_obtained"
        ### the second element (index=1) in "party" list is the corresponding datapoint
        df[lead_col][i] = lead_party
        ### similarly, stores name of candidate corresponding to highest vote-tally in the next (third-last) column
        df[lead_col+1][i] = lead_can

        ### stores index-number of the penultimate element of "new_cols" list (this will be the penultimate column)
        raw_margin_col = int(new_cols[-2])
        ### stores raw margin of difference in penultimate column
        df[raw_margin_col][i] = margin

        ### stores index-number of the last element of "new_cols" list (this will be the last column)
        margin_pp_col = int(new_cols[-1])
        ### stores percent points difference in last column
        df[margin_pp_col][i] = margin_pp
    
    ### if no votes have been tallied
    else: 
        
        ### loop runs through all candidates
        for n in range(0, candidate_len):
            ### column-index of a candidate is given by corresponding element in "new_cols"
            ### first candidate (k = 0) will be stored in column at index-0 in the "new_cols" list and so on
            column_no = int(new_cols[n])
            ### stores vote-share as 0 and writes into the column
            df[column_no][i] = 0
        
        ### sets values 
        lead_party = "No votes tallied"
        lead_can = "No votes tallied"
        margin = 0
        margin_pp = 0
        
        ### stores index-number of the fourth-last element of "new_cols" list (this will be the fourth-last column)
        lead_col = int(new_cols[-4])
        ### stores name of party corresponding to highest vote-tally in the fourth-last column
        ### if highest vote-tally is second (index=1) element in "votes_obtained"
        ### the second element (index=1) in "party" list is the corresponding datapoint
        df[lead_col][i] = lead_party
        ### similarly, stores name of candidate corresponding to highest vote-tally in the next (third-last) column
        df[lead_col+1][i] = lead_can

        ### stores index-number of the penultimate element of "new_cols" list (this will be the penultimate column)
        raw_margin_col = int(new_cols[-2])
        ### stores raw margin of difference in penultimate column
        df[raw_margin_col][i] = margin

        ### stores index-number of the last element of "new_cols" list (this will be the last column)
        margin_pp_col = int(new_cols[-1])
        ### stores percent points difference in last column
        df[margin_pp_col][i] = margin_pp       

### replaces null values with "0"
df.fillna(0, inplace = True)    
   
### displays dataframe
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,Last updated at 11-14-2022 20:55,101,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),998,DEM,Hector Roos (LPF),7,...,REP,2289,0.48,43.60,0.31,55.61,REP,Ron DeSantis (REP),275,12.01
1,Last updated at 11-14-2022 20:55,103,Carmen Jackie Gimenez (NPA),9,NPA,Charlie Crist (DEM),1118,DEM,Hector Roos (LPF),9,...,REP,2900,0.31,38.55,0.31,60.83,REP,Ron DeSantis (REP),646,22.28
2,Last updated at 11-14-2022 20:55,105,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),1089,DEM,Hector Roos (LPF),10,...,REP,2239,0.49,48.64,0.45,50.42,REP,Ron DeSantis (REP),40,1.79
3,Last updated at 11-14-2022 20:55,107,Carmen Jackie Gimenez (NPA),19,NPA,Charlie Crist (DEM),1242,DEM,Hector Roos (LPF),10,...,REP,2952,0.64,42.07,0.34,56.94,REP,Ron DeSantis (REP),439,14.87
4,Last updated at 11-14-2022 20:55,108,Carmen Jackie Gimenez (NPA),6,NPA,Charlie Crist (DEM),660,DEM,Hector Roos (LPF),5,...,REP,1594,0.38,41.41,0.31,57.90,REP,Ron DeSantis (REP),263,16.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443,Last updated at 11-14-2022 20:55,990,Carmen Jackie Gimenez (NPA),3,NPA,Charlie Crist (DEM),491,DEM,Hector Roos (LPF),5,...,REP,835,0.36,58.80,0.60,40.24,DEM,Charlie Crist (DEM),155,18.56
444,Last updated at 11-14-2022 20:55,991,Carmen Jackie Gimenez (NPA),2,NPA,Charlie Crist (DEM),70,DEM,Hector Roos (LPF),1,...,REP,221,0.90,31.67,0.45,66.97,REP,Ron DeSantis (REP),78,35.29
445,Last updated at 11-14-2022 20:55,992,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),3,DEM,Hector Roos (LPF),0,...,REP,8,0.00,37.50,0.00,62.50,REP,Ron DeSantis (REP),2,25.00
446,Last updated at 11-14-2022 20:55,993,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),24,DEM,Hector Roos (LPF),1,...,REP,42,0.00,57.14,2.38,40.48,DEM,Charlie Crist (DEM),7,16.67


### Computing overall county vote shares, leads and margins

In [8]:
## COMPUTING COUNTY-LEVEL TOTALS

### storing number of columns
new_col_no = len(df.columns)

total_col = candidate_len*3 + 2
df[new_col_no] = df[total_col].sum() 
ct_total = df[total_col].sum()

### creates new list to store index numbers of newly created columns
ct_cols = []

### stores new length of dataframe
ct_new_col_no = len(df.columns)

if ct_total != 0:

    ## COMPUTING COUNTY-LEVEL VOTE SHARES

    ### computes sum of all votes per candidate
    ct_can_votes = []
    for j in range(3, (candidate_len*3 + 1), 3):
        ct_can_votes.append(df[j].sum())

    ### computes vote-shares of candidates
    for i in range(0, candidate_len):
        df[ct_new_col_no + i] = (ct_can_votes[i]/ct_total*100).round(2)

    ## COMPUTING LEADING CANDIDATES AND WIN MARGINS

    ### computes new length of dataframe
    final_col_no = len(df.columns)

    ### stores overall maximum votes
    ct_max_value = max(ct_can_votes)

    ### if there are more than one instances of highest votes (in case of a tie)
    if ct_can_votes.count(ct_max_value) > 1:

        ### method returns indices of all duplicate highest vote-tallies
        indices = []
        def find_indices(list_to_check, item_to_find):
            return [idx for idx, value in enumerate(list_to_check) if value == item_to_find]

        ### stores indices of all duplicate highest vote-tallies
        indices = find_indices(ct_can_votes, ct_max_value)

        ### constructs string of all candidate names corresponding to highest votes tallied
        string = []
        for ind in range(0, len(indices)):
            index = indices[ind]
            string.append(can_names[index])

        ### stores leading party as tied
        ct_lead_party = "TIED"
        ### stores names of tied candidates
        ct_lead_can = " and ".join([", ".join(string[:-1]),string[-1]] if len(string) > 2 else string)

        ### stores margins as "0"
        ct_margin = 0
        ct_margin_pp = 0

        ### stores vote share for candidate with highest vote-tally
        ct_lead_voteshare = (ct_max_value/ct_total*100).round(2)

    else: 

        ### computes index of the highest tallied vote
        ct_max_index = ct_can_votes.index(ct_max_value)

        ### stores name of corresponding party and candidates
        ct_lead_party = party[ct_max_index]
        ct_lead_can = can_names[ct_max_index]

        ### computes second highest tallied vote
        second_ct_max_value = max(ct_can_votes, key = lambda x: min(ct_can_votes)-1 if (x == ct_max_value) else x)
        ### computes index of the second-highest tallied vote
        second_ct_max_index = ct_can_votes.index(second_ct_max_value)
        
        ### stores name of corresponding party and candidates
        second_ct_party = party[second_ct_max_index]
        second_ct_can = can_names[second_ct_max_index]

        ### stores margin of difference and the margin as percent points
        ct_margin = ct_max_value - second_ct_max_value
        ct_margin_pp = ((ct_max_value/ct_total*100)-(second_ct_max_value/ct_total*100)).round(2)

        ### stores the vote share of the highest tallied vote
        ct_lead_voteshare = (ct_max_value/ct_total*100).round(2)
        ### stores the vote share of the second highest tallied vote
        second_ct_voteshare = (second_ct_max_value/ct_total*100).round(2)

    ### writes into cells
    df[final_col_no + 1] = ct_lead_party
    df[final_col_no + 2] = ct_lead_can
    df[final_col_no + 3] = ct_margin
    df[final_col_no + 4] = ct_margin_pp
    df[final_col_no + 5] = ct_lead_voteshare
    df[final_col_no + 6] = second_ct_party
    df[final_col_no + 7] = second_ct_can
    df[final_col_no + 8] = second_ct_voteshare
    
else: 
    
    ### sets vote-shares of candidates as 0
    for i in range(0, candidate_len):
        df[ct_new_col_no + i] = 0
        
    ### computes new length of dataframe
    final_col_no = len(df.columns)
    
    ct_lead_party = "No votes tallied"
    ct_lead_can = "No votes tallied"
    ct_margin = 0
    ct_margin_pp = 0
    ct_lead_voteshare = 0
    second_ct_party = "No votes tallied"
    second_ct_can = "No votes tallied"
    second_ct_voteshare = 0
    
    ### writes into cells
    df[final_col_no + 1] = ct_lead_party
    df[final_col_no + 2] = ct_lead_can
    df[final_col_no + 3] = ct_margin
    df[final_col_no + 4] = ct_margin_pp
    df[final_col_no + 5] = ct_lead_voteshare
    df[final_col_no + 6] = second_ct_party
    df[final_col_no + 7] = second_ct_can
    df[final_col_no + 8] = second_ct_voteshare

### replaces null values with "0"
df.fillna(0, inplace = True)    
   
### displays dataframe
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26,27,29,30,31,32,33,34,35,36
0,Last updated at 11-14-2022 20:55,101,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),998,DEM,Hector Roos (LPF),7,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
1,Last updated at 11-14-2022 20:55,103,Carmen Jackie Gimenez (NPA),9,NPA,Charlie Crist (DEM),1118,DEM,Hector Roos (LPF),9,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
2,Last updated at 11-14-2022 20:55,105,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),1089,DEM,Hector Roos (LPF),10,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
3,Last updated at 11-14-2022 20:55,107,Carmen Jackie Gimenez (NPA),19,NPA,Charlie Crist (DEM),1242,DEM,Hector Roos (LPF),10,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
4,Last updated at 11-14-2022 20:55,108,Carmen Jackie Gimenez (NPA),6,NPA,Charlie Crist (DEM),660,DEM,Hector Roos (LPF),5,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443,Last updated at 11-14-2022 20:55,990,Carmen Jackie Gimenez (NPA),3,NPA,Charlie Crist (DEM),491,DEM,Hector Roos (LPF),5,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
444,Last updated at 11-14-2022 20:55,991,Carmen Jackie Gimenez (NPA),2,NPA,Charlie Crist (DEM),70,DEM,Hector Roos (LPF),1,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
445,Last updated at 11-14-2022 20:55,992,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),3,DEM,Hector Roos (LPF),0,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
446,Last updated at 11-14-2022 20:55,993,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),24,DEM,Hector Roos (LPF),1,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95


### Computing and inserting column names

In [9]:
### creates list to store column names with two initial elements
cols = ["LAST_UPDATE_2022", "BASE_PRECINCT"]

### loop runs through all the candidates
for i in range(0, candidate_len):
    ### generates column names for each candidate and adds to list
    cols.append("CANDIDATE_NAME_" + str(i+1))
    cols.append("VOTES_WON_" + str(i+1))
    cols.append("CANDIDATE_PARTY_" + str(i+1))

### adds column to list
cols.append("TOTAL_VOTES_TALLIED")

### loop runs through all the candidates
for i in range(0, candidate_len):
    ### generates column name for each candidate and adds to list
    cols.append("VOTE_SHARE_" + str(i+1))

### adds columns to list
cols.append("LEAD_PARTY")
cols.append("LEAD_CANDIDATE")
cols.append("RAW_MARGIN")
cols.append("MARGIN_PERCENTPOINTS")
cols.append("CT_TOTAL_VOTES_TALLIED")

### loop runs through all the candidates
for i in range(0, candidate_len):
    ### generates column names for each candidate and adds to list
    cols.append("CT_VOTE_SHARE_" + str(i+1))

cols.append("CT_LEAD_PARTY")
cols.append("CT_LEAD_CANDIDATE")
cols.append("CT_RAW_MARGIN")
cols.append("CT_MARGIN_PERCENTPOINTS")
cols.append("CT_LEAD_VOTESHARE")
cols.append("CT_SECOND_PARTY")
cols.append("CT_SECOND_CANDIDATE")
cols.append("CT_SECOND_VOTESHARE")

### replaces column indices with generated column names
df.columns = cols

### displays dataframe
df

Unnamed: 0,LAST_UPDATE_2022,BASE_PRECINCT,CANDIDATE_NAME_1,VOTES_WON_1,CANDIDATE_PARTY_1,CANDIDATE_NAME_2,VOTES_WON_2,CANDIDATE_PARTY_2,CANDIDATE_NAME_3,VOTES_WON_3,...,CT_VOTE_SHARE_3,CT_VOTE_SHARE_4,CT_LEAD_PARTY,CT_LEAD_CANDIDATE,CT_RAW_MARGIN,CT_MARGIN_PERCENTPOINTS,CT_LEAD_VOTESHARE,CT_SECOND_PARTY,CT_SECOND_CANDIDATE,CT_SECOND_VOTESHARE
0,Last updated at 11-14-2022 20:55,101,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),998,DEM,Hector Roos (LPF),7,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
1,Last updated at 11-14-2022 20:55,103,Carmen Jackie Gimenez (NPA),9,NPA,Charlie Crist (DEM),1118,DEM,Hector Roos (LPF),9,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
2,Last updated at 11-14-2022 20:55,105,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),1089,DEM,Hector Roos (LPF),10,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
3,Last updated at 11-14-2022 20:55,107,Carmen Jackie Gimenez (NPA),19,NPA,Charlie Crist (DEM),1242,DEM,Hector Roos (LPF),10,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
4,Last updated at 11-14-2022 20:55,108,Carmen Jackie Gimenez (NPA),6,NPA,Charlie Crist (DEM),660,DEM,Hector Roos (LPF),5,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443,Last updated at 11-14-2022 20:55,990,Carmen Jackie Gimenez (NPA),3,NPA,Charlie Crist (DEM),491,DEM,Hector Roos (LPF),5,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
444,Last updated at 11-14-2022 20:55,991,Carmen Jackie Gimenez (NPA),2,NPA,Charlie Crist (DEM),70,DEM,Hector Roos (LPF),1,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
445,Last updated at 11-14-2022 20:55,992,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),3,DEM,Hector Roos (LPF),0,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95
446,Last updated at 11-14-2022 20:55,993,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),24,DEM,Hector Roos (LPF),1,...,0.34,54.18,REP,Ron DeSantis (REP),44622,9.23,54.18,DEM,Charlie Crist (DEM),44.95


### Merging final dataframe with dataset of precinct demographics and prior results

In [10]:
### importing dataset with demographics and prior results
df_demos = pd.read_csv("hillsborough_gov_demos.csv",  keep_default_na = False)

### converting precincts to integer type
for i in range(0, len(df)):
    df["BASE_PRECINCT"][i] = int(df["BASE_PRECINCT"][i])
    df_demos["BASE_PRECINCT"][i] = int(df_demos["BASE_PRECINCT"][i])

### merging 
df_master = pd.merge(df, df_demos, on = "BASE_PRECINCT")

df_master["BASE_PRECINCT"] = df_master["BASE_PRECINCT"].apply(lambda x: "{0:0>3}".format(x))


### loop runs through dataframe
for i in range(0, len(df_master)):
    ### converts the precinct numbers to string
    df_master["BASE_PRECINCT"][i] = str(df_master["BASE_PRECINCT"][i])

    
### stores time
dt_string = tnow.strftime("%m-%d-%Y-%H%M")
### constructs filename
report = "gov_hillsborough_scraper_report_" + str(dt_string) + ".csv"
path = "governor_hillsborough/scraper_files/" + report

### exports dataframe 
df_master.to_csv(path, index = False)

### displays dataframe
df_master

Unnamed: 0,LAST_UPDATE_2022,BASE_PRECINCT,CANDIDATE_NAME_1,VOTES_WON_1,CANDIDATE_PARTY_1,CANDIDATE_NAME_2,VOTES_WON_2,CANDIDATE_PARTY_2,CANDIDATE_NAME_3,VOTES_WON_3,...,TOTAL_REGISTERED_VOTERS,RACE,PARTISANSHIP,DEMOCRAT_SHARE,REPUBLICAN_SHARE,NPA_SHARE,WHITE_SHARE,BLACK_SHARE,HISPANIC_SHARE,OTHER_SHARE
0,Last updated at 11-14-2022 20:55,101,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),998,DEM,Hector Roos (LPF),7,...,4849,Majority White,Other,30.9,33.8,33.1,65.1,13.9,9.8,11.1
1,Last updated at 11-14-2022 20:55,103,Carmen Jackie Gimenez (NPA),9,NPA,Charlie Crist (DEM),1118,DEM,Hector Roos (LPF),9,...,5380,Majority White,Other,28.9,36.7,32.0,72.0,7.6,11.3,9.1
2,Last updated at 11-14-2022 20:55,105,Carmen Jackie Gimenez (NPA),11,NPA,Charlie Crist (DEM),1089,DEM,Hector Roos (LPF),10,...,4836,Majority White,Other,38.6,27.8,31.9,56.0,14.7,16.4,12.9
3,Last updated at 11-14-2022 20:55,107,Carmen Jackie Gimenez (NPA),19,NPA,Charlie Crist (DEM),1242,DEM,Hector Roos (LPF),10,...,5187,Majority White,Other,32.6,34.9,30.1,71.4,8.3,9.6,10.7
4,Last updated at 11-14-2022 20:55,108,Carmen Jackie Gimenez (NPA),6,NPA,Charlie Crist (DEM),660,DEM,Hector Roos (LPF),5,...,2478,Majority White,Other,31.1,39.1,27.1,77.6,5.9,8.2,8.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443,Last updated at 11-14-2022 20:55,990,Carmen Jackie Gimenez (NPA),3,NPA,Charlie Crist (DEM),491,DEM,Hector Roos (LPF),5,...,2225,Other,Other,40.2,21.6,35.9,43.4,23.1,24.8,8.8
444,Last updated at 11-14-2022 20:55,991,Carmen Jackie Gimenez (NPA),2,NPA,Charlie Crist (DEM),70,DEM,Hector Roos (LPF),1,...,432,Majority White,Other,26.6,41.0,31.0,72.2,9.3,12.0,6.5
445,Last updated at 11-14-2022 20:55,992,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),3,DEM,Hector Roos (LPF),0,...,94,Majority White,Other,30.9,23.4,41.5,53.2,20.2,18.1,8.5
446,Last updated at 11-14-2022 20:55,993,Carmen Jackie Gimenez (NPA),0,NPA,Charlie Crist (DEM),24,DEM,Hector Roos (LPF),1,...,135,Other,Other,48.1,20.0,31.9,49.6,18.5,23.7,8.1
