# Pandas
## Subsets and Data Filters

In [1]:
# import necessary packages
import pandas as pd

In [3]:
# Import csv data into the dataframe
bank = pd.read_csv("./data/Bank Tele Marketing/bank_market.csv")
print(bank.shape)
print(bank.columns)

(45211, 18)
Index(['Cust_num', 'age', 'job', 'marital', 'education', 'default', 'balance',
       'housing', 'loan', 'contact', 'day', 'month', 'duration', 'campaign',
       'pdays', 'previous', 'poutcome', 'y'],
      dtype='object')


In [4]:
# Print head of 15 rows from dataframe
print(bank.head(15))

    Cust_num  age           job   marital  education default  balance housing  \
0          1   58    management   married   tertiary      no     2143     yes   
1          2   44    technician    single  secondary      no       29     yes   
2          3   33  entrepreneur   married  secondary      no        2     yes   
3          4   47   blue-collar   married    unknown      no     1506     yes   
4          5   33       unknown    single    unknown      no        1      no   
5          6   35    management   married   tertiary      no      231     yes   
6          7   28    management    single   tertiary      no      447     yes   
7          8   42  entrepreneur  divorced   tertiary     yes        2     yes   
8          9   58       retired   married    primary      no      121     yes   
9         10   43    technician    single  secondary      no      593     yes   
10        11   41        admin.  divorced  secondary      no      270     yes   
11        12   29        adm

In [5]:
# Get only specific row using the index
bank2 = bank.iloc[2]
print(bank2)
print(type(bank2))

Cust_num                3
age                    33
job          entrepreneur
marital           married
education       secondary
default                no
balance                 2
housing               yes
loan                  yes
contact           unknown
day                     5
month                 may
duration               76
campaign                1
pdays                  -1
previous                0
poutcome          unknown
y                      no
Name: 2, dtype: object
<class 'pandas.core.series.Series'>


In [6]:
# If we want to get the greater number of indices than one, it can be done this way
indices = [2, 5, 8, 12, 54]
bank3 = bank.iloc[indices]
print(bank3)

    Cust_num  age           job  marital  education default  balance housing  \
2          3   33  entrepreneur  married  secondary      no        2     yes   
5          6   35    management  married   tertiary      no      231     yes   
8          9   58       retired  married    primary      no      121     yes   
12        13   53    technician  married  secondary      no        6     yes   
54        55   24    technician   single  secondary      no     -103     yes   

   loan  contact  day month  duration  campaign  pdays  previous poutcome   y  
2   yes  unknown    5   may        76         1     -1         0  unknown  no  
5    no  unknown    5   may       139         1     -1         0  unknown  no  
8    no  unknown    5   may        50         1     -1         0  unknown  no  
12   no  unknown    5   may       517         1     -1         0  unknown  no  
54  yes  unknown    5   may       145         1     -1         0  unknown  no  


In [7]:
# To create new dataset by separating only few columns from existing dataframe
cols = ["job", "age"]
bank4 = bank[cols]
print(bank4)

                job  age
0        management   58
1        technician   44
2      entrepreneur   33
3       blue-collar   47
4           unknown   33
...             ...  ...
45206    technician   51
45207       retired   71
45208       retired   72
45209   blue-collar   57
45210  entrepreneur   37

[45211 rows x 2 columns]


In [8]:
# To get specific columns and few rows from existing dataframe
bank5 = bank[["age", "job"]].iloc[4:10]
print(bank5)

   age           job
4   33       unknown
5   35    management
6   28    management
7   42  entrepreneur
8   58       retired
9   43    technician


In [9]:
# To exclude a set of rows based on indices
bank6 = bank.drop([0, 4, 6, 9])
print(bank6.head(10))

    Cust_num  age           job   marital  education default  balance housing  \
1          2   44    technician    single  secondary      no       29     yes   
2          3   33  entrepreneur   married  secondary      no        2     yes   
3          4   47   blue-collar   married    unknown      no     1506     yes   
5          6   35    management   married   tertiary      no      231     yes   
7          8   42  entrepreneur  divorced   tertiary     yes        2     yes   
8          9   58       retired   married    primary      no      121     yes   
10        11   41        admin.  divorced  secondary      no      270     yes   
11        12   29        admin.    single  secondary      no      390     yes   
12        13   53    technician   married  secondary      no        6     yes   
13        14   58    technician   married    unknown      no       71     yes   

   loan  contact  day month  duration  campaign  pdays  previous poutcome   y  
1    no  unknown    5   may 

In [10]:
# To exclude few columns from the dataframe
bank7 = bank.drop(["Cust_num"], axis=1) # axis=1 for columns, axis=0 for rows. Default is 0
print(bank7.head(5))

   age           job  marital  education default  balance housing loan  \
0   58    management  married   tertiary      no     2143     yes   no   
1   44    technician   single  secondary      no       29     yes   no   
2   33  entrepreneur  married  secondary      no        2     yes  yes   
3   47   blue-collar  married    unknown      no     1506     yes   no   
4   33       unknown   single    unknown      no        1      no   no   

   contact  day month  duration  campaign  pdays  previous poutcome   y  
0  unknown    5   may       261         1     -1         0  unknown  no  
1  unknown    5   may       151         1     -1         0  unknown  no  
2  unknown    5   may        76         1     -1         0  unknown  no  
3  unknown    5   may        92         1     -1         0  unknown  no  
4  unknown    5   may       198         1     -1         0  unknown  no  


In [11]:
# Try to extract dataset based on the condition
bank8 = bank[bank["age"] > 40]
print(bank8.shape)

(20494, 18)


In [13]:
# Multiple filtering conditions
bank9 = bank[(bank["age"] > 40) & (bank["loan"] == 'no')]
print(bank9.shape)

(17156, 18)


In [14]:
bank10 = bank[(bank["age"] > 40) | (bank["loan"] == "no")]
print(bank10.shape)

(41305, 18)
