In [1]:
# Import libraries
import pandas as pd
import numpy as np

In [2]:
# Load Excel File
filename = 'data/car_financing.xlsx'
df = pd.read_excel(filename)

In [3]:
## Filtering 
car_filter = df['car_type']=='Toyota Sienna'
interest_filter = df['interest_rate']==0.0702
df = df.loc[car_filter & interest_filter, :]

In [4]:
# Approach 1 dictionary substitution using rename method
df = df.rename(columns={'Starting Balance': 'starting_balance',
                        'Interest Paid': 'interest_paid', 
                        'Principal Paid': 'principal_paid',
                        'New Balance': 'new_balance'})

In [5]:
# Approach 2 list replacement
# Only changing Month -> month, but we need to list the rest of the columns
df.columns = ['month',
              'starting_balance',
              'Repayment',
              'interest_paid',
              'principal_paid',
              'new_balance',
              'term',
              'interest_rate',
              'car_type']

In [6]:
# Approach 1
# This approach allows you to drop multiple columns at a time 
df = df.drop(columns=['term'])

In [7]:
# Approach 2 use the del command
del df['Repayment']

In [8]:
df.shape

(60, 7)

## Aggregate Methods
It is often a good idea to compute summary statistics.

Aggregate Method | Description
--- | --- 
sum | sum of values
cumsum | cumulative sum
mean | mean of values
median | arithmetic median of values
min | minimum
max | maximum
mode | mode
std | unbiased standard deviation
var | unbiased variance
quantile | compute rank-based statistics of elements

In [9]:
df.head()

Unnamed: 0,month,starting_balance,interest_paid,principal_paid,new_balance,interest_rate,car_type
0,1,34689.96,202.93,484.3,34205.66,0.0702,Toyota Sienna
1,2,34205.66,200.1,487.13,33718.53,0.0702,Toyota Sienna
2,3,33718.53,197.25,489.98,33228.55,0.0702,Toyota Sienna
3,4,33228.55,194.38,492.85,32735.7,0.0702,Toyota Sienna
4,5,32735.7,191.5,495.73,32239.97,0.0702,Toyota Sienna


In [10]:
# sum the values in a column
# total amount of interest paid over the course of the loan
df['interest_paid'].sum()

6450.2699999999995

In [11]:
# sum all the values across all columns
df.sum()

month                                                            1830
starting_balance                                           1.1186e+06
interest_paid                                                 6450.27
principal_paid                                                34690.3
new_balance                                               1.08391e+06
interest_rate                                                   4.212
car_type            Toyota SiennaToyota SiennaToyota SiennaToyota ...
dtype: object

In [None]:
'Toyota Sienna' + 'Toyota Sienna'

In [None]:
# Notice that by default it seems like the sum function ignores missing values. 
help(df['interest_paid'].sum)

In [None]:
# The info method gives the column datatypes + number of non-null values
# Notice that we seem to have 60 non-null values for all but the Interest Paid column. 
df.info()