**1. Filtering with Pandas**

**1.1. Filtering with NumPy arrays**

In [1]:
import numpy as np
import pandas as pd

In [2]:
indexed_array = np.array(['a', 'b', 'c', 'd'])
indexed_array

array(['a', 'b', 'c', 'd'], dtype='<U1')

In [5]:
print(indexed_array[0]) #returns first element ('a')
print(indexed_array[2]) #returns third element ('c')
print(indexed_array[3]) #returns last element ('e')

a
c
d


In [7]:
indexed_array[[0,2,3]]

array(['a', 'c', 'd'], dtype='<U1')

In [8]:
indexed_array[0:2] # returns np.array(['a', 'b'])

array(['a', 'b'], dtype='<U1')

In [13]:
filtered_indices = [True,False,True,False]

In [14]:
indexed_array[filtered_indices]

array(['a', 'c'], dtype='<U1')

In [17]:
starts_with_a = [x == 'a' for x in indexed_array] #returns [true,false,false,false]

In [18]:
starts_with_a

[True, False, False, False]

In [19]:
indexed_array[starts_with_a] #returns np.array(['a'])

array(['a'], dtype='<U1')

In [20]:
numbers_array = np.array([1,2,3,4,5])
numbers_array > 2

array([False, False,  True,  True,  True])

In [22]:
numbers = np.arange(1,5)
numbers

array([1, 2, 3, 4])

In [23]:
example_array = np.array(numbers)
example_array

array([1, 2, 3, 4])

In [24]:
example_array[[x % 2 == 0 for x in example_array]]

array([2, 4])

##### Filtering Pandas Rows

In [25]:
index = ['Apple', 'Google', 'Facebook', 'Microsoft', 'Amazon']
age = [46, 24, 18, 47, 28]
market_cap = [2.42, 1.29, 0.377, 1.77, 1.16]

df = pd.DataFrame({'age': age, 'market_cap': market_cap}, index=index)

In [26]:
df

Unnamed: 0,age,market_cap
Apple,46,2.42
Google,24,1.29
Facebook,18,0.377
Microsoft,47,1.77
Amazon,28,1.16


##### Row Indexing

In [27]:
df.loc[['Apple','Amazon'],:]

Unnamed: 0,age,market_cap
Apple,46,2.42
Amazon,28,1.16


In [28]:
df.iloc[0:2,:]

Unnamed: 0,age,market_cap
Apple,46,2.42
Google,24,1.29


##### Broadcasting

In [29]:
df[df.age > 25]

Unnamed: 0,age,market_cap
Apple,46,2.42
Microsoft,47,1.77
Amazon,28,1.16


#### List comprehension

In [30]:
df[[x in ['Apple','Amazon'] for x in df.index.values]]

Unnamed: 0,age,market_cap
Apple,46,2.42
Amazon,28,1.16


##### Compound Logical Statements

In [31]:
df[(df.age > 25) & (df.market_cap > 2)]

Unnamed: 0,age,market_cap
Apple,46,2.42


#### Querying

In [32]:
df.query('age > 25')

Unnamed: 0,age,market_cap
Apple,46,2.42
Microsoft,47,1.77
Amazon,28,1.16


In [33]:
df.query("index in ('Apple','Amazon')")

Unnamed: 0,age,market_cap
Apple,46,2.42
Amazon,28,1.16


##### Selection using Indexes

In [34]:
df.loc[:,['age']]

Unnamed: 0,age
Apple,46
Google,24
Facebook,18
Microsoft,47
Amazon,28


In [35]:
df.iloc[:,[0]]

Unnamed: 0,age
Apple,46
Google,24
Facebook,18
Microsoft,47
Amazon,28


##### Selecting using Column Names

In [36]:
df[['market_cap']]

Unnamed: 0,market_cap
Apple,2.42
Google,1.29
Facebook,0.377
Microsoft,1.77
Amazon,1.16


##### Selecting using Data Types

In [37]:
df.select_dtypes('int64')

Unnamed: 0,age
Apple,46
Google,24
Facebook,18
Microsoft,47
Amazon,28


##### Common Scenarios to use pandas filters

In [38]:
index = ['Great Texas Snowstorm of 2021', 'Buffalo Blizzard of 1977', 'Great Snow of 1717','Mount Shasta Blizzard of 1959']
date = ['2021-02-14','1977-01-27','1717-03-01','1959-02-19']
snowfall = [20.3, 150, 122, 480]
location = ['Texas','New York','New England','California']

blizzards = pd.DataFrame({'date': date, 'snowfall': snowfall, 'location':location}, index=index)

blizzards

Unnamed: 0,date,snowfall,location
Great Texas Snowstorm of 2021,2021-02-14,20.3,Texas
Buffalo Blizzard of 1977,1977-01-27,150.0,New York
Great Snow of 1717,1717-03-01,122.0,New England
Mount Shasta Blizzard of 1959,1959-02-19,480.0,California


##### Filtering by substring criteria

In [51]:
blizzards[blizzards.index.str.contains('Snow')] # conditionals

Unnamed: 0,date,snowfall,location
Great Texas Snowstorm of 2021,2021-02-14,20.3,Texas
Great Snow of 1717,1717-03-01,122.0,New England


In [40]:
blizzards.query("index.str.contains('Snow').values") #Querying

Unnamed: 0,date,snowfall,location
Great Texas Snowstorm of 2021,2021-02-14,20.3,Texas
Great Snow of 1717,1717-03-01,122.0,New England


##### Filtering on Dates

In [41]:
blizzards[(blizzards.date > '1900-01-01') & (blizzards.date < '2000-01-01')] #Conditional

Unnamed: 0,date,snowfall,location
Buffalo Blizzard of 1977,1977-01-27,150.0,New York
Mount Shasta Blizzard of 1959,1959-02-19,480.0,California


In [42]:
blizzards.query("date > '1900-01-01' and date < '2000-01-01'") #Querying

Unnamed: 0,date,snowfall,location
Buffalo Blizzard of 1977,1977-01-27,150.0,New York
Mount Shasta Blizzard of 1959,1959-02-19,480.0,California


##### Filtering Using ‘in’ and ‘not in’

In [44]:
select_locations = ['New York','Texas']

blizzards[blizzards.location.isin(select_locations)] #Conditional

Unnamed: 0,date,snowfall,location
Great Texas Snowstorm of 2021,2021-02-14,20.3,Texas
Buffalo Blizzard of 1977,1977-01-27,150.0,New York


In [45]:
blizzards.query("location in ('New York','Texas')") #Querying

Unnamed: 0,date,snowfall,location
Great Texas Snowstorm of 2021,2021-02-14,20.3,Texas
Buffalo Blizzard of 1977,1977-01-27,150.0,New York


In [46]:
blizzards[~blizzards.location.isin(select_locations)] #Conditional

Unnamed: 0,date,snowfall,location
Great Snow of 1717,1717-03-01,122.0,New England
Mount Shasta Blizzard of 1959,1959-02-19,480.0,California


In [47]:
blizzards.query("location not in ('New York','Texas')") #Querying

Unnamed: 0,date,snowfall,location
Great Snow of 1717,1717-03-01,122.0,New England
Mount Shasta Blizzard of 1959,1959-02-19,480.0,California


##### Operator Chaining

In [49]:
blizzards[((blizzards.snowfall > 100) & (blizzards.date > '1900-01-01')) | (blizzards.location.str.contains('New'))]

Unnamed: 0,date,snowfall,location
Buffalo Blizzard of 1977,1977-01-27,150.0,New York
Great Snow of 1717,1717-03-01,122.0,New England
Mount Shasta Blizzard of 1959,1959-02-19,480.0,California


In [50]:
blizzards.query("(snowfall > 100 and date > '1900-01-01') or (location.str.contains('New').values)") #Querying

Unnamed: 0,date,snowfall,location
Buffalo Blizzard of 1977,1977-01-27,150.0,New York
Great Snow of 1717,1717-03-01,122.0,New England
Mount Shasta Blizzard of 1959,1959-02-19,480.0,California
