# Filtering Data
- Task: Filter out the data to only have data car_type of 'Toyota Sienna' and interest_rate of 0.0702.

### Load the data

In [4]:
import pandas as pd

# Create a data frame (load the data from a csv file)
filename = 'Ex_Files_Python_for_Data_Vis/Exercise Files/Pandas/data/car_financing.csv'
df = pd.read_csv(filename)

In [5]:
df

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.30,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.10,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.70,60,0.0702,Toyota Sienna
4,5,32735.70,687.23,191.50,495.73,32239.97,60,0.0702,Toyota Sienna
...,...,...,...,...,...,...,...,...,...
403,56,3951.11,796.01,9.54,786.47,3164.64,60,0.0290,VW Golf R
404,57,3164.64,796.01,7.64,788.37,2376.27,60,0.0290,VW Golf R
405,58,2376.27,796.01,5.74,790.27,1586.00,60,0.0290,VW Golf R
406,59,1586.00,796.01,3.83,792.18,793.82,60,0.0290,VW Golf R


In [6]:
# Check the first few rows of data
df.head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


### Create a Car Type Filter

In [8]:
# Get the count for each distinct value in the target column
df['car_type'].value_counts()

car_type
VW Golf R         144
Toyota Sienna     120
Toyota Carolla    111
Toyota Corolla     33
Name: count, dtype: int64

In [9]:
# Create a car filter for the target car type
#   - This produces a pandas series of True/False values
car_filter = df['car_type'] == 'Toyota Sienna'

In [10]:
car_filter.head()

0    True
1    True
2    True
3    True
4    True
Name: car_type, dtype: bool

#### Approach 1: Utalize the filter with square brackets

In [12]:
# Filter the dataframe, which returns another dataframe of only the target value
df[car_filter].head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


#### Approach 2: Utalize the filter with loc

In [14]:
# Filter the dataframe, which returns another dataframe of only the target value
#   - ':' in this case selects all columns
df.loc[car_filter, :].head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


#### Update the dataframe after applying the filter

In [16]:
# Get the count for each distinct value in the target column
#   - Nothing has changed since the original dataframe has not yet been updated to account for the filter
df['car_type'].value_counts()

car_type
VW Golf R         144
Toyota Sienna     120
Toyota Carolla    111
Toyota Corolla     33
Name: count, dtype: int64

In [17]:
# Filter the dataframe and update
df = df.loc[car_filter, :]

In [19]:
# Get the count for each distinct value in the target column
#   - This now reflects the updated dataframe
df['car_type'].value_counts()

car_type
Toyota Sienna    120
Name: count, dtype: int64

### Create an Interest Rate Filter

In [24]:
# Get the count for each distinct value in the target column
df['interest_rate'].value_counts()

interest_rate
0.0702    60
Name: count, dtype: int64

In [25]:
# Create a filter for the target column value
#   - This produces a pandas series of True/False values
interest_filter = df['interest_rate'] == 0.0702

In [26]:
# Filter the dataframe and update
df = df.loc[interest_filter, :]

In [27]:
# Get the count for each distinct value in the target column
df['interest_rate'].value_counts()

interest_rate
0.0702    60
Name: count, dtype: int64

In [29]:
# Get the count for each distinct value in the target column
#   - dropna parameter controls whether NaN (Not a Number) values are included in the count
#      Ex) (dropna = False) -> will include the NaN values if they exist
df['interest_rate'].value_counts(dropna = False)

interest_rate
0.0702    60
Name: count, dtype: int64

### Combine filters
- More concise than updating twice like in the previous steps

In [31]:
# Use both filters simultaneously to return only values that meet both specifications
df.loc[car_filter & interest_filter, :]

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna
5,6,32239.97,687.23,188.6,498.63,31741.34,60,0.0702,Toyota Sienna
6,7,31741.34,687.23,185.68,501.55,31239.79,60,0.0702,Toyota Sienna
7,8,31239.79,687.23,182.75,504.48,30735.31,60,0.0702,Toyota Sienna
8,9,30735.31,687.23,179.8,507.43,30227.88,60,0.0702,Toyota Sienna
9,10,30227.88,687.23,176.83,510.4,29717.48,60,0.0702,Toyota Sienna
