# PyBank

In [5]:
# Import the necessary libraries for reading CSV files and manipulating data sets

import pandas as pd
from pathlib import Path
import numpy as np

In [8]:
# Set the path for the CSV file
path = Path("resources/budget_data.csv")

In [9]:
# Read the CSV into a dataframe using Pandas and print the first 5 rows
budget_df = pd.read_csv(path)
budget_df.head()


Unnamed: 0,Date,Profit/Losses
0,Jan-2010,867884
1,Feb-2010,984655
2,Mar-2010,322013
3,Apr-2010,-69417
4,May-2010,310503


In [10]:
# Calculate the total number of months in the data set

# Use conditional statement to check for duplicates in the data set
# If there are any duplicates, they will be dropped before calculating the number of months

if budget_df.duplicated().all == False:
    num_months = budget_df['Date'].count()
    print(num_months) 
else:
    budget_df = budget_df.drop_duplicates().copy()
    num_months = budget_df['Date'].count()
    print(num_months) 

86


In [27]:
# Calculate the net total amount of Profit/Losses over the entire period

total_profit_losses = "${:,.2f}".format(budget_df['Profit/Losses'].sum())
print(total_profit_losses)

$38,382,578.00


In [28]:
# Calculate the average of the changes in Profit/Losses over the entire period

# Extract values of Profit/Losses dataframe series into a list
profit_losses = budget_df['Profit/Losses'].tolist()

# Generate a list of monthly changes in profit/losses
# A profit/loss difference for the first month can't be calculated therefore the loop must iterate for one less element
monthly_changes=[]
for month_diff in range(len(profit_losses)-1): 
    monthly_changes.append(profit_losses[month_diff + 1] - profit_losses[month_diff])

# Calculate the average of the monthly changes and round to two decimal places
avg_monthly_changes = "${:,.2f}".format(round((sum(monthly_changes) / len(monthly_changes)),2))
print(avg_monthly_changes)

$-2,315.12


In [29]:
# Calculate the greatest increase in profits (date and amount) over the entire period

# Add a new column to the dataframe with the results for the monthly_changes
# Note that the first month would not have a value for monthly_changes, so a 0 will be added instead

budget_df['Monthly Changes'] = ""  #adds a new empty column to the dataframe

month_change = []

for i in range(len(budget_df)):  #iterates over the length of the dataframe
    if i == 0:
        budget_df['Monthly Changes'][i] = 0  #sets the monthly change for row 0 equal to 0  
    else:
        budget_df['Monthly Changes'][i] = monthly_changes[i-1] #sets the monthly change for all other rows equal to the monthly change value

budget_df

# CONTINUED in next cell

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  budget_df['Monthly Changes'][i] = 0  #sets the monthly change for row 0 equal to 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  budget_df['Monthly Changes'][i] = monthly_changes[i-1] #sets the monthly change for all other rows equal to the monthly change value


Unnamed: 0,Date,Profit/Losses,Monthly Changes
0,Jan-2010,867884,0
1,Feb-2010,984655,116771
2,Mar-2010,322013,-662642
3,Apr-2010,-69417,-391430
4,May-2010,310503,379920
...,...,...,...
81,Oct-2016,102685,-665765
82,Nov-2016,795914,693229
83,Dec-2016,60988,-734926
84,Jan-2017,138230,77242


In [30]:
## CONTINUED: Calculate the greatest increase in profits (date and amount) over the entire period

# Create empty dictionary to populate with Date and Monthly Changes data from dataframe
short_budget = {}

# Extract values for Date and Monthly Changes from dataframe into lists
months = budget_df['Date'].tolist() # generated a list for the 'Date' series in dataframe with just the values
changes = budget_df['Monthly Changes'].tolist() # generated a ist for the 'Monthly Changes' series in dataframe with just the vaues
short_budget = dict(zip(months, changes)) # used zip to create a dictionary from two lists

# Calculate the greatest increase in profits (date and amount) over the entire period
greatest_increase_key = max(short_budget, key = short_budget.get) #obtains the key with the highest value in dictionary
greatest_increase_value = "${:,.2f}".format(max(short_budget.values())) #obtains the highest value in dictionary
greatest_increase = print(f"{greatest_increase_key} {greatest_increase_value}")

Feb-2012 $1,926,159.00


In [31]:
# Calculate the greatest decrease in losses (date and amount) over the entire period

greatest_decrease_key = min(short_budget, key = short_budget.get)
greatest_decrease_value = "${:,.2f}".format(min(short_budget.values()))
greatest_decrease = print(f"{greatest_decrease_key} {greatest_decrease_value}")

Sep-2013 $-2,196,167.00


In [34]:
# Print the analysis

print("Financial Analysis")
print("---------------------------")
print(f"Total months: {num_months}")
print(f"Total: {total_profit_losses}")
print(f"Average change: {avg_monthly_changes} ")
print(f"Greatest Increase in Profits occured in {greatest_increase_key} ({greatest_increase_value})")
print(f"Greatest Decrease in Profits occured in {greatest_decrease_key} ({greatest_decrease_value})")

Financial Analysis
---------------------------
Total months: 86
Total: $38,382,578.00
Average change: $-2,315.12 
Greatest Increase in Profits occured in Feb-2012 ($1,926,159.00)
Greatest Decrease in Profits occured in Sep-2013 ($-2,196,167.00)


In [33]:
# Export the analysis to a text file that contains the final results

# Set output file name
output_path = 'output.txt'

# Open the output path as a file object
with open(output_path, 'w') as file:
    file.write("Financial Analysis\n")
    file.write("---------------------------\n")
    file.write(f"Total months: {num_months}\n")
    file.write(f"Total: {total_profit_losses}\n")
    file.write(f"Average change: {avg_monthly_changes}\n")
    file.write(f"Greatest Increase in Profits occured in {greatest_increase_key} ({greatest_increase_value})\n")
    file.write(f"Greatest Decrease in Profits occured in {greatest_decrease_key} ({greatest_decrease_value})\n")