# Pandas Approach to to PyPoll 

In [1]:
# Import pandas
import pandas as pd

In [4]:
# Read election_results.csv as dataframe, showing what the dataframe looks like
election_results_df = pd.read_csv('Resources/election_results.csv')
election_results_df.head()

Unnamed: 0,Ballot ID,County,Candidate
0,1323913,Jefferson,Charles Casper Stockham
1,1005842,Jefferson,Charles Casper Stockham
2,1880345,Jefferson,Charles Casper Stockham
3,1600337,Jefferson,Charles Casper Stockham
4,1835994,Jefferson,Charles Casper Stockham


In [12]:
# Size of data (rows, columns)
election_results_df.shape

(369711, 3)

In [13]:
# More accurately, len() shows the total: 
total_votes = len(election_results_df)
print(f"Total number of votes: {total_votes}")

Total number of votes: 369711


In [19]:
# Show the total number of votes for each candidate
candidates = election_results_df['Candidate'].value_counts()
print(candidates)

Diana DeGette              272892
Charles Casper Stockham     85213
Raymon Anthony Doane        11606
Name: Candidate, dtype: int64


In [45]:
# Transform new data to dataframe
candidates_df = candidates.reset_index()
candidates_df.columns = ["Candidates", "Total Votes"]

# Create a column for percentage, base it on the prior total_votes
candidates_df['Percentage'] = candidates_df["Total Votes"] / total_votes * 100
candidates_df['Percentage'] = candidates_df['Percentage'].apply(lambda x: f"{x:.0f}%")
candidates_df

Unnamed: 0,Candidates,Total Votes,Percentage
0,Diana DeGette,272892,74%
1,Charles Casper Stockham,85213,23%
2,Raymon Anthony Doane,11606,3%


In [47]:
# Show the total number of votes for each county
county = election_results_df['County'].value_counts()
print(county)

Denver       306055
Jefferson     38855
Arapahoe      24801
Name: County, dtype: int64


In [48]:
# Transform new data to dataframe
counties_df = county.reset_index()
counties_df.columns = ["County", "Total Votes"]

# Create a column for percentage, base it on the prior total_votes
counties_df['Percentage'] = counties_df["Total Votes"] / total_votes * 100
counties_df['Percentage'] = counties_df['Percentage'].apply(lambda x: f"{x:.0f}%")
counties_df

Unnamed: 0,County,Total Votes,Percentage
0,Denver,306055,83%
1,Jefferson,38855,11%
2,Arapahoe,24801,7%


In [53]:
# Show county with highest turnout
most_votes = counties_df["Total Votes"].max()
highest_turnout = counties_df[counties_df["Total Votes"] == most_votes]

print("The county with the highest voter turnout is ")
print(highest_turnout)

The county with the highest voter turnout is 
   County  Total Votes Percentage
0  Denver       306055        83%


In [54]:
# Identify the highest number of votes
popular_vote = candidates_df["Total Votes"].max()

# Show row of the individual with highest votes to identify winner. 
winning_candidate = candidates_df[candidates_df["Total Votes"] == popular_vote]

print("The winner of the Colorado Election is: ")
print(winning_candidate)

The winner of the Colorado Election is: 
      Candidates  Total Votes Percentage
0  Diana DeGette       272892        74%
