In [61]:
# SCRIPT FOR PYBANK (HW #1)

# * In this challenge, you are tasked with creating a Python script for analyzing the financial records of your company. 
# * You will give a set of financial data called [budget_data.csv](PyBank/Resources/budget_data.csv). 
# * The dataset is composed of two columns: `Date` and `Profit/Losses`. 
# * Your task is to create a Python script that analyzes the records to calculate each of the following:
#   * The total number of months included in the dataset
#   * The net total amount of "Profit/Losses" over the entire period
#   * The average of the changes in "Profit/Losses" over the entire period
#   * The greatest increase in profits (date and amount) over the entire period
#   * The greatest decrease in losses (date and amount) over the entire period
# * As an example, your analysis should look similar to the one below:

#   EXAMPLE OUTCOME:
#   Financial Analysis
#   ----------------------------
#   Total Months: 86
#   Total: $38382578
#   Average  Change: $-2315.12
#   Greatest Increase in Profits: Feb-2012 ($1926159)
#   Greatest Decrease in Profits: Sep-2013 ($-2196167)

# * In addition, your final script should both print the analysis to the terminal and export a text file with the results.

# Import necessary libraries
import csv
import datetime
import pandas as pd
import locale

# Set locale to USD
locale.setlocale(locale.LC_ALL,"")

# Define the dataframe variable and the dataframe location on local machine
csv_path = (r"C:\Users\ryanz\Desktop\Data Analytics Bootcamp\Resources\RICH201901DATA3\03-Python\Homework\Instructions\PyBank\Resources\budget_data.csv")
df = pd.read_csv(csv_path)

print('----------------------------')
print("FINANCIAL ANALYSIS:")
print('----------------------------')
     
# The total number of months included in the dataset
number_months = df["Date"].value_counts().sum()
print('----------------------------')
print(f"Total Months of Data Available: {number_months}")

# The net total amount of "Profit/Losses" over the entire period
net_total = df["Profit/Losses"].sum()
print(f"Net Total Profit/Losses: {locale.currency(net_total)}")

# The average of the changes in "Profit/Losses" over the entire period
df["Value Change"] = df["Profit/Losses"].diff(1)
avg_total = df["Value Change"].mean()
print(f"Avg. Value Change Month-to-Month: {locale.currency(avg_total)}")

# The greatest increase in profits (date and amount) over the entire period
greatest_inc_chg_date = df.loc[df["Value Change"].idxmax()]["Date"]
greatest_inc_chg_value = df.loc[df["Value Change"].idxmax()]["Value Change"]
print(f"Greatest Increase in Profits: {greatest_inc_chg_date}, {locale.currency(greatest_inc_chg_value)}")

# The greatest decrease in profits (date and amount) over the entire period
greatest_dec_chg_date = df.loc[df["Value Change"].idxmin()]["Date"]
greatest_dec_chg_value = df.loc[df["Value Change"].idxmin()]["Value Change"]
print(f"Greatest Decrease in Profits: {greatest_dec_chg_date}, {locale.currency(greatest_dec_chg_value)}")
print('----------------------------')

df.head()

----------------------------
FINANCIAL ANALYSIS:
----------------------------
----------------------------
Total Months of Data Available: 86
Net Total Profit/Losses: $38382578.00
Avg. Value Change Month-to-Month: ($2315.12)
Greatest Increase in Profits: Feb-12, $1926159.00
Greatest Decrease in Profits: Sep-13, ($2196167.00)
----------------------------


Unnamed: 0,Date,Profit/Losses,Value Change
0,Jan-10,867884,
1,Feb-10,984655,116771.0
2,Mar-10,322013,-662642.0
3,Apr-10,-69417,-391430.0
4,May-10,310503,379920.0


In [27]:
# SCRIPT FOR PYPOLL (HW #2)

# * In this challenge, you are tasked with helping a small, rural town modernize its vote-counting process. 
# * (Up until now, Uncle Cleetus had been trustfully tallying them one-by-one, but unfortunately, his concentration isn't 
# * what it used to be.)
# * You will be give a set of poll data called [election_data.csv](PyPoll/Resources/election_data.csv). 
# * The dataset is composed of three columns: `Voter ID`, `County`, and `Candidate`. Your task is to create a Python 
# * script that analyzes the votes and calculates each of the following:
#   * The total number of votes cast
#   * A complete list of candidates who received votes
#   * The percentage of votes each candidate won
#   * The total number of votes each candidate won
#   * The winner of the election based on popular vote.
# * As an example, your analysis should look similar to the one below:

#   EXAMPLE OUTCOME:
#   Election Results
#   -------------------------
#   Total Votes: 3521001
#   -------------------------
#   Khan: 63.000% (2218231)
#   Correy: 20.000% (704200)
#   Li: 14.000% (492940)
#   O'Tooley: 3.000% (105630)
#   -------------------------
#   Winner: Khan
#   -------------------------

# * In addition, your final script should both print the analysis to the terminal and export a text file with the results.

# Import necessary libraries
import csv
import datetime
import pandas as pd
import locale

csv_path = (r"C:\Users\ryanz\Desktop\Data Analytics Bootcamp\Resources\RICH201901DATA3\03-Python\Homework\Instructions\PyPoll\Resources\election_data.csv")
df_poll = pd.read_csv(csv_path)

print('----------------------------')
print("POLL ANALYSIS:")
print('----------------------------')

# The total number of votes cast
voters = df_poll["Voter ID"].value_counts().sum()
print(f"Total Number of Voters: {voters}")

# A complete list of candidates who received votes
candidate = df_poll["Candidate"].unique()
candidate_vote = df_poll['Candidate'].value_counts()
candidate_prcvote = (candidate_vote/voters)*100

# Print the output in this format:  "Candidate:  # Votes (% Votes)""
print(candidate[0] + ": " + str(candidate_vote[0]) + " (" + str(candidate_prcvote[0].round()) + "%) ")
print(candidate[1] + ": " + str(candidate_vote[1]) + " (" + str(candidate_prcvote[1].round()) + "%) ")
print(candidate[2] + ": " + str(candidate_vote[2]) + " (" + str(candidate_prcvote[2].round()) + "%) ")
print(candidate[3] + ": " + str(candidate_vote[3]) + " (" + str(candidate_prcvote[3].round()) + "%) ")

print('----------------------------')



df_poll.head(10)

----------------------------
POLL ANALYSIS:
----------------------------
Total Number of Voters: 3521001
Khan: 2218231 (63.0%) 
Correy: 704200 (20.0%) 
Li: 492940 (14.0%) 
O'Tooley: 105630 (3.0%) 
----------------------------


Unnamed: 0,Voter ID,County,Candidate
0,12864552,Marsh,Khan
1,17444633,Marsh,Correy
2,19330107,Marsh,Khan
3,19865775,Queen,Khan
4,11927875,Marsh,Khan
5,19014606,Marsh,Li
6,17775191,Queen,Correy
7,14003692,Marsh,Khan
8,14255761,Marsh,Khan
9,13870173,Marsh,Correy
