In [1]:
# import dependencies

import pandas as pd
import numpy as np
import glob
import os

In [2]:
# read and combine csv files into single dataframe

path = os.path.join('Resources')
all_files = glob.glob(path + '/*.csv')

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col = None, header = 0)
    li.append(df)
    
data = pd.concat(li, axis = 0, ignore_index = True)
data.head()

Unnamed: 0,Voter ID,County,Candidate
0,12864552,Marsh,Khan
1,17444633,Marsh,Correy
2,19330107,Marsh,Khan
3,19865775,Queen,Khan
4,11927875,Marsh,Khan


In [3]:
# calculate the total number of votes cast

vote_ct = len(data.index.unique())

In [4]:
# calculate a complete list of candidates who received votes

data.Candidate.unique()

array(['Khan', 'Correy', 'Li', "O'Tooley"], dtype=object)

In [5]:
# calculate the total number of votes each candidate won

tot_votes = data.groupby('Candidate')['Voter ID'].count()
tot_votes = tot_votes.sort_values(ascending = False)
tot_votes = tot_votes.reset_index()
tot_votes

Unnamed: 0,Candidate,Voter ID
0,Khan,2218231
1,Correy,704200
2,Li,492940
3,O'Tooley,105630


In [6]:
# calculate the percentage of votes each candidate won

tot_votes['percent'] = round((tot_votes['Voter ID']/vote_ct)*100)
tot_votes

Unnamed: 0,Candidate,Voter ID,percent
0,Khan,2218231,63.0
1,Correy,704200,20.0
2,Li,492940,14.0
3,O'Tooley,105630,3.0


In [7]:
# calculate the winner of the election based on popular vote

winner = str(tot_votes['Candidate'][0])

In [8]:
# prepare analysis

first = winner + ': ' + str(tot_votes['percent'][0]) + '% (' + str(tot_votes['Voter ID'][0]) + ')'
second = str(tot_votes['Candidate'][1]) + ': ' + str(tot_votes['percent'][1]) + '% (' + str(tot_votes['Voter ID'][1]) + ')'
third = str(tot_votes['Candidate'][2]) + ': ' + str(tot_votes['percent'][2]) + '% (' + str(tot_votes['Voter ID'][2]) + ')'
fourth = str(tot_votes['Candidate'][3]) + ': ' + str(tot_votes['percent'][3]) + '% (' + str(tot_votes['Voter ID'][3]) + ')'

In [9]:
# print analysis in terminal

print('Election Results')
print('-------------------------')
print('Total Votes:', vote_ct)
print('-------------------------')
print(first)
print(second)
print(third)
print(fourth)
print('-------------------------')
print('Winner:', winner)
print('-------------------------')

Election Results
-------------------------
Total Votes: 3521001
-------------------------
Khan: 63.0% (2218231)
Correy: 20.0% (704200)
Li: 14.0% (492940)
O'Tooley: 3.0% (105630)
-------------------------
Winner: Khan
-------------------------


In [10]:
# export analysis in text file

file = open('analysis.txt', 'w')

file.write('Election Results\n')
file.write('-------------------------\n')
file.write('Total Votes: 3521001\n')
file.write('-------------------------\n')
file.write('Khan: 63.0% (2218231)\n')
file.write('Correy: 20.0% (704200)\n')
file.write('Li: 14.0% (492940)\n')
file.write("O'Tooley: 3.0% (105630)\n")
file.write('-------------------------\n')
file.write('Winner: Khan\n')
file.write('-------------------------\n')

file.close()