In [1]:
# Dependencies
import pandas as pd
import os

In [2]:
# Create reference to the csv file
csv_file = "Resources/election_data.csv"

In [3]:
# Read in csv and display commas for numbers.
pd.options.display.float_format = '{:,.0f}'.format
election_data_df = pd.read_csv(csv_file)

# Print the first 5 lines of dataframe (jupyter notebook only)
election_data_df.head()

# Print data.
# print(election_data_df)

Unnamed: 0,Voter ID,County,Candidate
0,12864552,Marsh,Khan
1,17444633,Marsh,Correy
2,19330107,Marsh,Khan
3,19865775,Queen,Khan
4,11927875,Marsh,Khan


In [4]:
# Calculate the total number of votes cast
num_votes = election_data_df["Voter ID"].count()

print(f"Total number of votes cast: {num_votes}")

Total number of votes cast: 3521001


In [5]:
# Get a complete list of candidates who received votes
list_candidates = election_data_df["Candidate"].unique()

print("List of candidates who have received votes:")
list_candidates

List of candidates who have received votes:


array(['Khan', 'Correy', 'Li', "O'Tooley"], dtype=object)

In [6]:
# Calculate the total number of votes each candidate won
num_votes_list = []
for candidate in list_candidates:
    votes_for_candidate_df = election_data_df.loc[election_data_df["Candidate"] == candidate]
    num_votes_candidate = votes_for_candidate_df["Voter ID"].count()
    num_votes_list.append(num_votes_candidate)
    
print("Number of votes each candidate won:")
print(num_votes_list)

Number of votes each candidate won:
[2218231, 704200, 492940, 105630]


In [7]:
# Calculate the percentage of votes each candidate won
percent_votes_list = []
for vote_count in num_votes_list:
    percent_votes_candidate = (vote_count / num_votes) * 100
    percent_votes_candidate = round(percent_votes_candidate, 2)
    percent_votes_list.append(percent_votes_candidate)

print("Percentage of votes each candidate won:")
print(percent_votes_list)

Percentage of votes each candidate won:
[63.0, 20.0, 14.0, 3.0]


In [8]:
# Calculate the winner of the election based on popular vote.

# Construct dictionary of lists to get election results.
election_results_dict = {
    "Candidates": list_candidates,
    "Number of Votes": num_votes_list,
    "Percentage of Votes (%)": percent_votes_list
}

# Create dataframe from dictionary of lists.
election_results_df = pd.DataFrame(election_results_dict)

# Sort results in descending order to determine winner.
election_results_descending_df = election_results_df.sort_values("Number of Votes", ascending=False)

# Reset index of sorted results.
election_results_descending_df = election_results_descending_df.reset_index(drop=True)

# Store winner in variable.
winner = election_results_descending_df.iloc[0]['Candidates']
print(f"Winner: {winner}")

# Formatting - add percent sign to values in the percentage of votes column.
election_results_descending_df["Percentage of Votes (%)"] = election_results_descending_df["Percentage of Votes (%)"].astype(str) + '%'

# Convert number of votes column values type to float for formatting numbers to have commas.
election_results_descending_df["Number of Votes"] = election_results_descending_df["Number of Votes"].astype(float)

# Print results in descending order.
election_results_descending_df

Winner: Khan


Unnamed: 0,Candidates,Number of Votes,Percentage of Votes (%)
0,Khan,2218231,63.0%
1,Correy,704200,20.0%
2,Li,492940,14.0%
3,O'Tooley,105630,3.0%


In [9]:
# Print analysis
print("---------------------------------------------------------------")
print("Election Results")
print("---------------------------------------------------------------")
print("Total votes: {:,.0f}".format(num_votes))
print("---------------------------------------------------------------")
print(election_results_descending_df.to_string(index=False))
print("---------------------------------------------------------------")
print(f"Winner: {winner}")
print("---------------------------------------------------------------")

---------------------------------------------------------------
Election Results
---------------------------------------------------------------
Total votes: 3,521,001
---------------------------------------------------------------
Candidates  Number of Votes Percentage of Votes (%)
      Khan        2,218,231                   63.0%
    Correy          704,200                   20.0%
        Li          492,940                   14.0%
  O'Tooley          105,630                    3.0%
---------------------------------------------------------------
Winner: Khan
---------------------------------------------------------------


In [10]:
# Export a text file with the results.
with open("election_results.txt", 'w') as file:

    file.write("---------------------------------------------------------------\r\n")
    file.write("Election Results\r\n")
    file.write("---------------------------------------------------------------\r\n")
    file.write("Total votes: {:,.0f}".format(num_votes) + "\r\n")
    file.write("---------------------------------------------------------------\r\n")
    file.write(election_results_descending_df.to_string(index=False) + "\r\n")
    file.write("---------------------------------------------------------------\r\n")
    file.write(f"Winner: {winner}\r\n")
    file.write("---------------------------------------------------------------\r\n")