# Part 2: Boolean Indexing

In [2]:
# http://pandas.pydata.org/pandas-docs/stable/dsintro.html
# https://medium.com/dunder-data/how-to-learn-pandas-108905ab4955

from ipywidgets import widgets
from IPython.display import display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Get account info as DataFrame

In [3]:
df = pd.read_csv(r'C:\Users\Alex Loftus\Dropbox\Programming\Projects\Jupyter-Notebooks\Chase-acct-info\Checking-11-23-2017.CSV')

In [4]:
df = df[['Posting Date', 'Description', 'Amount', 'Balance']]
df = df.set_index('Posting Date')
df.head()

df = df.iloc[2:]  # Clear out the ones without a balance

Unnamed: 0_level_0,Description,Amount,Balance
Posting Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12/19/2017,POS DEBIT UNICORN/NARWHAL ...,-6.5,
12/19/2017,POS DEBIT UNICORN/NARWHAL ...,-20.0,
12/18/2017,PAYPAL INST XFER PARKMOBILE WE...,-2.25,1099.82
12/18/2017,PAYPAL INST XFER XSOLLA WE...,-4.99,1102.07
12/18/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 1...,-24.03,1107.06


In [5]:
df

Unnamed: 0_level_0,Description,Amount,Balance
Posting Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12/18/2017,PAYPAL INST XFER PARKMOBILE WE...,-2.25,1099.82
12/18/2017,PAYPAL INST XFER XSOLLA WE...,-4.99,1102.07
12/18/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 1...,-24.03,1107.06
12/18/2017,THE ROYAL INN BELLINGHAM WA 1...,-9.50,1131.09
12/18/2017,PEL MENI BELLINGHAM WA 1...,-17.00,1140.59
12/18/2017,AUNTIE ANNES FERNDALE WA 1...,-6.51,1157.59
12/18/2017,ARAMARK ZOES BOOKSIDE B BELLINGHAM WA 1...,-5.82,1164.1
12/18/2017,SECOND TIME AROUND COMP BELLINGHAM WA 1...,-108.70,1169.92
12/15/2017,GAMESTOP #4331 1 BELLI BELLINGHAM WA 621444 1...,-14.12,1278.62
12/14/2017,PAYPAL INST XFER PARKMOBILE WE...,-2.25,1292.74


## Start playing with boolean operators

#### Keep in mind that normally df['X'] selects columns, but when you pass a list of booleans it selects rows

In [19]:
# You can pass a list of booleans to the indexing operator of a df object
criteria = [True, False, True, False, False]
df.head()[criteria]
df.head().index
df = df.reset_index()

Unnamed: 0,index,Posting Date,Description,Amount,Balance
0,0,12/18/2017,PAYPAL INST XFER PARKMOBILE WE...,-2.25,1099.82
2,2,12/18/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 1...,-24.03,1107.06


RangeIndex(start=0, stop=5, step=1)

### Using Series

In [21]:
#  Can only work with index as (0, 1, 2, 3 ... n)
criteria = pd.Series(criteria)
df.head()[criteria]
df = df[['Posting Date', 'Description', 'Amount', 'Balance']]
df.head()

Unnamed: 0,Posting Date,Description,Amount,Balance
0,12/18/2017,PAYPAL INST XFER PARKMOBILE WE...,-2.25,1099.82
2,12/18/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 1...,-24.03,1107.06


Unnamed: 0,Posting Date,Description,Amount,Balance
0,12/18/2017,PAYPAL INST XFER PARKMOBILE WE...,-2.25,1099.82
1,12/18/2017,PAYPAL INST XFER XSOLLA WE...,-4.99,1102.07
2,12/18/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 1...,-24.03,1107.06
3,12/18/2017,THE ROYAL INN BELLINGHAM WA 1...,-9.5,1131.09
4,12/18/2017,PEL MENI BELLINGHAM WA 1...,-17.0,1140.59


### Using np arrays

In [24]:
np_criteria = np.array(criteria); np_criteria
df.head()[np_criteria]

array([ True, False,  True, False, False], dtype=bool)

Unnamed: 0,Posting Date,Description,Amount,Balance
0,12/18/2017,PAYPAL INST XFER PARKMOBILE WE...,-2.25,1099.82
2,12/18/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 1...,-24.03,1107.06


### Using comparison operators

Generally used on a single column or Series of data.

In [36]:
# Is there a way to do this in one line with DataFrames instead of Series?

df['Balance'] = pd.to_numeric(df['Balance'])
df.head()['Balance']

df['Amount'] = pd.to_numeric(df['Amount']).abs()
df.head()['Amount']

0    1099.82
1    1102.07
2    1107.06
3    1131.09
4    1140.59
Name: Balance, dtype: float64

0     2.25
1     4.99
2    24.03
3     9.50
4    17.00
Name: Amount, dtype: float64

In [56]:
# All the times when I was getting scary low

small_bool = df['Balance'] < 100
small_bool.head()

small_balance = df[small_bool]
small_balance
small_balance.shape  # Had balance < 100 for 11 purchases

0    False
1    False
2    False
3    False
4    False
Name: Balance, dtype: bool

Unnamed: 0,Posting Date,Description,Amount,Balance
194,9/13/2017,PAYPAL INST XFER NEKOKIN WE...,16.0,48.81
195,9/13/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 0...,70.67,64.81
197,9/12/2017,PAYPAL INST XFER KIGOSAI WE...,11.0,35.48
198,9/12/2017,KEY CR. TRANSFER ONLINE-BK 017254026572595 WE...,25.0,46.48
199,9/11/2017,PAYPAL INST XFER NEKOKIN WE...,2.45,21.48
200,9/11/2017,PAYPAL INST XFER XSOLLA WE...,4.99,23.93
201,9/11/2017,PAYPAL INST XFER NEKOKIN WE...,87.0,28.92
319,7/24/2017,PAYPAL INST XFER PARKMOBILE WE...,4.25,56.45
320,7/24/2017,ATM WITHDRAWAL 000724 0...,20.0,60.7
321,7/24/2017,SQ *SEATTLE FUSION SEATTLE WA 0...,8.25,80.7


(11, 4)

#### You can do it in one line (Returns the whole df)

In [60]:
df[df['Balance'] <= 100]

Unnamed: 0,Posting Date,Description,Amount,Balance
194,9/13/2017,PAYPAL INST XFER NEKOKIN WE...,16.0,48.81
195,9/13/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 0...,70.67,64.81
197,9/12/2017,PAYPAL INST XFER KIGOSAI WE...,11.0,35.48
198,9/12/2017,KEY CR. TRANSFER ONLINE-BK 017254026572595 WE...,25.0,46.48
199,9/11/2017,PAYPAL INST XFER NEKOKIN WE...,2.45,21.48
200,9/11/2017,PAYPAL INST XFER XSOLLA WE...,4.99,23.93
201,9/11/2017,PAYPAL INST XFER NEKOKIN WE...,87.0,28.92
319,7/24/2017,PAYPAL INST XFER PARKMOBILE WE...,4.25,56.45
320,7/24/2017,ATM WITHDRAWAL 000724 0...,20.0,60.7
321,7/24/2017,SQ *SEATTLE FUSION SEATTLE WA 0...,8.25,80.7


### By description

In [79]:
criteria = df['Description'].str.contains('PAYPAL')
df[criteria].head(20)
df[criteria].shape[0] / df.shape[0]  # Proportion of my purchases that used Paypal is 8%

Unnamed: 0,Posting Date,Description,Amount,Balance
0,12/18/2017,PAYPAL INST XFER PARKMOBILE WE...,2.25,1099.82
1,12/18/2017,PAYPAL INST XFER XSOLLA WE...,4.99,1102.07
9,12/14/2017,PAYPAL INST XFER PARKMOBILE WE...,2.25,1292.74
11,12/13/2017,PAYPAL INST XFER PARKMOBILE WE...,8.25,1298.85
12,12/13/2017,PAYPAL INST XFER FANDANGOMED WE...,14.5,1307.1
20,12/11/2017,PAYPAL INST XFER XSOLLA WE...,4.99,244.94
32,12/4/2017,PAYPAL INST XFER PATREON WE...,2.0,449.87
39,11/30/2017,PAYPAL INST XFER WIKIMEDIAFO WE...,5.0,636.1
46,11/27/2017,PAYPAL INST XFER NEKOKIN WE...,535.0,740.95
59,11/21/2017,PAYPAL TRANSFER PP...,52.01,289.4


0.08588548601864181

### With operator expressions

**&** = and

**|** = or

**~** = not

In [81]:
crit_1 = criteria
crit_2 = df['Amount'] > 100
crit_all = crit_1 & crit_2

df[crit_all]  #  By defining variables

Unnamed: 0,Posting Date,Description,Amount,Balance
46,11/27/2017,PAYPAL INST XFER NEKOKIN WE...,535.0,740.95
163,9/25/2017,PAYPAL INST XFER NEKOKIN WE...,535.0,953.64
221,9/5/2017,PAYPAL INST XFER HAYLEYGG689 WE...,216.0,813.03
312,7/26/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,571.76
407,6/27/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,457.33
490,5/30/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,721.98
577,4/27/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,1176.04
611,4/11/2017,PAYPAL INST XFER HAYLEYGG689 WE...,180.24,509.43
616,4/10/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,737.3
722,2/21/2017,PAYPAL INST XFER HAYLEYGG689 WE...,108.0,854.05


In [93]:
df[(df['Amount'] > 100) & (df['Description'].str.contains('PAYPAL'))]  # Without defining variables
 # Needed to use parentheses on the conditionals for it to work

Unnamed: 0,Posting Date,Description,Amount,Balance
46,11/27/2017,PAYPAL INST XFER NEKOKIN WE...,535.0,740.95
163,9/25/2017,PAYPAL INST XFER NEKOKIN WE...,535.0,953.64
221,9/5/2017,PAYPAL INST XFER HAYLEYGG689 WE...,216.0,813.03
312,7/26/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,571.76
407,6/27/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,457.33
490,5/30/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,721.98
577,4/27/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,1176.04
611,4/11/2017,PAYPAL INST XFER HAYLEYGG689 WE...,180.24,509.43
616,4/10/2017,PAYPAL INST XFER HAYLEYGG689 WE...,625.0,737.3
722,2/21/2017,PAYPAL INST XFER HAYLEYGG689 WE...,108.0,854.05


In [100]:
pk = df[df['Description'].str.contains('PARKING')]; pk
# This is missing data somehow cause I know I got more parking tickets than this

Unnamed: 0,Posting Date,Description,Amount,Balance
456,6/13/2017,ACE PARKING 3265 SEATTLE WA 0...,18.0,457.43
621,4/10/2017,SEATTLE METER PARKING SEATTLE WA 0...,1.87,1485.25
1011,10/31/2016,SEA SC PARKING #850012 SEATTLE WA 1...,10.0,1730.71
1096,9/8/2016,UW PARKING ONLINE 206-5434519 WA 0...,40.0,1345.43
1103,9/6/2016,UW PARKING ONLINE 206-5434519 WA 0...,35.0,1649.28
1254,7/11/2016,UW PARKING ONLINE 206-5434519 WA 0...,35.0,709.24
1301,6/15/2016,REPUBLIC PARKING 30 562 SEATTLE WA 0...,12.0,789.16
1329,6/8/2016,WWU PARKING PAYBOXES BELLINGHAM WA 0...,8.0,1134.06
1359,5/23/2016,STIA PUBLIC PARKING SEATAC WA 0...,68.0,634.45
1485,3/24/2016,REPUBLIC PARKING 30 64 SEATTLE WA 0...,17.0,2969.82


In [105]:
df[(df['Amount'] <= 100) & (df['Amount'] >= 50)]

Unnamed: 0,Posting Date,Description,Amount,Balance
13,12/13/2017,FRED MEYE FRED MEYER 0 BELLINGHAM WA 1...,82.75,1321.60
19,12/12/2017,REMOTE ONLINE DEPOSIT # 1,53.99,298.93
38,12/1/2017,Online Transfer to SAV ...9660 transaction#: ...,50.00,586.10
57,11/21/2017,SHELL 4314 WEST 10 AVEN VANCOUVER. BC 1...,56.73,252.67
59,11/21/2017,PAYPAL TRANSFER PP...,52.01,289.40
65,11/20/2017,VENMO CASHOUT PP...,68.50,257.42
100,11/10/2017,QuickPay with Zelle payment from SUSAN K LOFTU...,100.00,638.48
112,11/1/2017,Online Transfer to SAV ...9660 transaction#: ...,50.00,609.33
116,10/26/2017,FRED MEYE FRED MEYER 6 BELLINGHAM WA 1...,60.92,1243.52
119,10/18/2017,UNITED 016292149 800-932-2732 TX 1...,75.00,134.22


In [135]:
sav = df[(df['Description'].str.contains('SAV')) & ~(df['Description'].str.contains('from SAV'))]
sav = sav[sav['Amount'] < 100]

sum(sav['Amount'])  # Amount I put in savings
sum(sav['Amount']) + (sav.shape[0] * 25)  # Amount I would have gotten if I had been doing $100 instead of $75
sav.shape
sav

1575.0

2625.0

(42, 4)

Unnamed: 0,Posting Date,Description,Amount,Balance
29,12/6/2017,TRANSFER TO SAV XXXXX9660 12/06,25.0,352.87
38,12/1/2017,Online Transfer to SAV ...9660 transaction#: ...,50.0,586.1
105,11/6/2017,TRANSFER TO SAV XXXXX9660 11/06,25.0,574.05
112,11/1/2017,Online Transfer to SAV ...9660 transaction#: ...,50.0,609.33
143,10/6/2017,TRANSFER TO SAV XXXXX9660 10/06,25.0,429.1
156,9/29/2017,Online Transfer to SAV ...9660 transaction#: ...,50.0,835.76
216,9/6/2017,TRANSFER TO SAV XXXXX9660 09/06,25.0,706.53
225,9/1/2017,Online Transfer to SAV ...9660 transaction#: ...,50.0,1091.71
280,8/7/2017,TRANSFER TO SAV XXXXX9660 08/07,25.0,231.61
293,8/1/2017,Online Transfer to SAV ...9660 transaction#: ...,50.0,304.02


### isin method