In [None]:
# Import libraries
import pandas as pd
import numpy as np

In [None]:
# Load Excel File
filename = 'data/car_financing.xlsx'
df = pd.read_excel(filename)

In [None]:
## Filtering 
car_filter = df['car_type']=='Toyota Sienna'
interest_filter = df['interest_rate']==0.0702
df = df.loc[car_filter & interest_filter, :]

In [None]:
# Approach 1 dictionary substitution using rename method
df = df.rename(columns={'Starting Balance': 'starting_balance',
                        'Interest Paid': 'interest_paid', 
                        'Principal Paid': 'principal_paid',
                        'New Balance': 'new_balance'})

In [None]:
# Approach 2 list replacement
# Only changing Month -> month, but we need to list the rest of the columns
df.columns = ['month',
              'starting_balance',
              'Repayment',
              'interest_paid',
              'principal_paid',
              'new_balance',
              'term',
              'interest_rate',
              'car_type']

In [None]:
# Approach 1
# This approach allows you to drop multiple columns at a time 
df = df.drop(columns=['term'])

In [None]:
# Approach 2 use the del command
del df['Repayment']

In [None]:
df.shape

## Aggregate Methods
It is often a good idea to compute summary statistics.

Aggregate Method | Description
--- | --- 
sum | sum of values
cumsum | cumulative sum
mean | mean of values
median | arithmetic median of values
min | minimum
max | maximum
mode | mode
std | unbiased standard deviation
var | unbiased variance
quantile | compute rank-based statistics of elements

In [None]:
df.head()

In [None]:
# sum the values in a column
# total amount of interest paid over the course of the loan
df['interest_paid'].sum()

In [None]:
# sum all the values across all columns
df.sum()

In [None]:
'Toyota Sienna' + 'Toyota Sienna'

In [None]:
# Notice that by default it seems like the sum function ignores missing values. 
help(df['interest_paid'].sum)

In [None]:
# The info method gives the column datatypes + number of non-null values
# Notice that we seem to have 60 non-null values for all but the Interest Paid column. 
df.info()