# 8 ways to filter a DataFrame using partial strings or pattern

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('netflix_titles_nov_2019.csv')
data.head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
0,81193313,Chocolate,,"Ha Ji-won, Yoon Kye-sang, Jang Seung-jo, Kang ...",South Korea,"November 30, 2019",2019,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",Brought together by meaningful meals in the pa...,TV Show
1,81197050,Guatemala: Heart of the Mayan World,"Luis Ara, Ignacio Jaunsolo",Christian Morales,,"November 30, 2019",2019,TV-G,67 min,"Documentaries, International Movies","From Sierra de las Minas to Esquipulas, explor...",Movie
2,81213894,The Zoya Factor,Abhishek Sharma,"Sonam Kapoor, Dulquer Salmaan, Sanjay Kapoor, ...",India,"November 30, 2019",2019,TV-14,135 min,"Comedies, Dramas, International Movies",A goofy copywriter unwittingly convinces the I...,Movie
3,81082007,Atlantics,Mati Diop,"Mama Sane, Amadou Mbow, Ibrahima Traore, Nicol...","France, Senegal, Belgium","November 29, 2019",2019,TV-14,106 min,"Dramas, Independent Movies, International Movies","Arranged to marry a rich man, young Ada is cru...",Movie
4,80213643,Chip and Potato,,"Abigail Oliver, Andrea Libman, Briana Buckmast...","Canada, United Kingdom",,2019,TV-Y,2 Seasons,Kids' TV,"Lovable pug Chip starts kindergarten, makes ne...",TV Show


### 1. Filter rows with only a particular word 

In [3]:
mask = data['type'].isin(['TV Show'])
mask.head()

0     True
1    False
2    False
3    False
4     True
Name: type, dtype: bool

In [4]:
#filter using square brackets
data[mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
0,81193313,Chocolate,,"Ha Ji-won, Yoon Kye-sang, Jang Seung-jo, Kang ...",South Korea,"November 30, 2019",2019,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",Brought together by meaningful meals in the pa...,TV Show
4,80213643,Chip and Potato,,"Abigail Oliver, Andrea Libman, Briana Buckmast...","Canada, United Kingdom",,2019,TV-Y,2 Seasons,Kids' TV,"Lovable pug Chip starts kindergarten, makes ne...",TV Show
8,70205672,La Reina del Sur,,"Kate del Castillo, Cristina Urgel, Alberto Jim...","United States, Spain, Colombia, Mexico",,2019,TV-14,2 Seasons,"Crime TV Shows, International TV Shows, Spanis...",This compelling show tells the story of the le...,TV Show
11,81094391,Sugar Rush Christmas,,"Hunter March, Candace Nelson, Adriano Zumbo",United States,"November 29, 2019",2019,TV-PG,1 Season,Reality TV,"It's everything you love about ""Sugar Rush"" – ...",TV Show
13,81152641,The Charming Stepmom,,"Shahkrit Yamnarm, View Wannarot Sontichai, Kri...",,"November 29, 2019",2019,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",A quirky fashion student becomes the nanny of ...,TV Show


In [5]:
#filter using .loc and select desired columns
data.loc[mask, ['title','country','duration']]

Unnamed: 0,title,country,duration
0,Chocolate,South Korea,1 Season
4,Chip and Potato,"Canada, United Kingdom",2 Seasons
8,La Reina del Sur,"United States, Spain, Colombia, Mexico",2 Seasons
11,Sugar Rush Christmas,United States,1 Season
13,The Charming Stepmom,,1 Season
...,...,...,...
5811,Jack Taylor,"United States, Ireland",1 Season
5814,DreamWorks Spooky Stories: Volume 2,,1 Season
5815,DreamWorks Shrek's Swamp Stories,United States,1 Season
5816,DreamWorks How to Train Your Dragon Legends,United States,1 Season


In [6]:
#Using the == equals operator to create the mask
mask = data['type'] == 'Movie'
movies = data[mask]
movies.head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
1,81197050,Guatemala: Heart of the Mayan World,"Luis Ara, Ignacio Jaunsolo",Christian Morales,,"November 30, 2019",2019,TV-G,67 min,"Documentaries, International Movies","From Sierra de las Minas to Esquipulas, explor...",Movie
2,81213894,The Zoya Factor,Abhishek Sharma,"Sonam Kapoor, Dulquer Salmaan, Sanjay Kapoor, ...",India,"November 30, 2019",2019,TV-14,135 min,"Comedies, Dramas, International Movies",A goofy copywriter unwittingly convinces the I...,Movie
3,81082007,Atlantics,Mati Diop,"Mama Sane, Amadou Mbow, Ibrahima Traore, Nicol...","France, Senegal, Belgium","November 29, 2019",2019,TV-14,106 min,"Dramas, Independent Movies, International Movies","Arranged to marry a rich man, young Ada is cru...",Movie
5,81172754,Crazy people,Moses Inwang,"Ramsey Nouah, Chigul, Sola Sobowale, Ireti Doy...",Nigeria,"November 29, 2019",2018,TV-14,107 min,"Comedies, International Movies, Thrillers",Nollywood star Ramsey Nouah learns that someon...,Movie
6,81120982,I Lost My Body,Jérémy Clapin,"Hakim Faris, Victoire Du Bois, Patrick d'Assum...",France,"November 29, 2019",2019,TV-MA,81 min,"Dramas, Independent Movies, International Movies","Romance, mystery and adventure intertwine as a...",Movie


In [7]:
#work with lowercase letters in the isin() 
#First convert column into all lowercase
#can also use [x.lower() for x in ['Movies']]
mask = data['type'].str.lower().isin(['tv show'])
data[mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
0,81193313,Chocolate,,"Ha Ji-won, Yoon Kye-sang, Jang Seung-jo, Kang ...",South Korea,"November 30, 2019",2019,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",Brought together by meaningful meals in the pa...,TV Show
4,80213643,Chip and Potato,,"Abigail Oliver, Andrea Libman, Briana Buckmast...","Canada, United Kingdom",,2019,TV-Y,2 Seasons,Kids' TV,"Lovable pug Chip starts kindergarten, makes ne...",TV Show
8,70205672,La Reina del Sur,,"Kate del Castillo, Cristina Urgel, Alberto Jim...","United States, Spain, Colombia, Mexico",,2019,TV-14,2 Seasons,"Crime TV Shows, International TV Shows, Spanis...",This compelling show tells the story of the le...,TV Show
11,81094391,Sugar Rush Christmas,,"Hunter March, Candace Nelson, Adriano Zumbo",United States,"November 29, 2019",2019,TV-PG,1 Season,Reality TV,"It's everything you love about ""Sugar Rush"" – ...",TV Show
13,81152641,The Charming Stepmom,,"Shahkrit Yamnarm, View Wannarot Sontichai, Kri...",,"November 29, 2019",2019,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",A quirky fashion student becomes the nanny of ...,TV Show


Have two strings to filter by. Check if an elements matches any of the 2

In [8]:
#Using isin()
mask1 = data['type'].isin(['Movie','TV Show'])
data[mask1].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
0,81193313,Chocolate,,"Ha Ji-won, Yoon Kye-sang, Jang Seung-jo, Kang ...",South Korea,"November 30, 2019",2019,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",Brought together by meaningful meals in the pa...,TV Show
1,81197050,Guatemala: Heart of the Mayan World,"Luis Ara, Ignacio Jaunsolo",Christian Morales,,"November 30, 2019",2019,TV-G,67 min,"Documentaries, International Movies","From Sierra de las Minas to Esquipulas, explor...",Movie
2,81213894,The Zoya Factor,Abhishek Sharma,"Sonam Kapoor, Dulquer Salmaan, Sanjay Kapoor, ...",India,"November 30, 2019",2019,TV-14,135 min,"Comedies, Dramas, International Movies",A goofy copywriter unwittingly convinces the I...,Movie
3,81082007,Atlantics,Mati Diop,"Mama Sane, Amadou Mbow, Ibrahima Traore, Nicol...","France, Senegal, Belgium","November 29, 2019",2019,TV-14,106 min,"Dramas, Independent Movies, International Movies","Arranged to marry a rich man, young Ada is cru...",Movie
4,80213643,Chip and Potato,,"Abigail Oliver, Andrea Libman, Briana Buckmast...","Canada, United Kingdom",,2019,TV-Y,2 Seasons,Kids' TV,"Lovable pug Chip starts kindergarten, makes ne...",TV Show


In [9]:
#Using == equals operator
mask2 = (data['type'] == 'Movie') | (data['type'] == 'TV Show')
data[mask2].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
0,81193313,Chocolate,,"Ha Ji-won, Yoon Kye-sang, Jang Seung-jo, Kang ...",South Korea,"November 30, 2019",2019,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",Brought together by meaningful meals in the pa...,TV Show
1,81197050,Guatemala: Heart of the Mayan World,"Luis Ara, Ignacio Jaunsolo",Christian Morales,,"November 30, 2019",2019,TV-G,67 min,"Documentaries, International Movies","From Sierra de las Minas to Esquipulas, explor...",Movie
2,81213894,The Zoya Factor,Abhishek Sharma,"Sonam Kapoor, Dulquer Salmaan, Sanjay Kapoor, ...",India,"November 30, 2019",2019,TV-14,135 min,"Comedies, Dramas, International Movies",A goofy copywriter unwittingly convinces the I...,Movie
3,81082007,Atlantics,Mati Diop,"Mama Sane, Amadou Mbow, Ibrahima Traore, Nicol...","France, Senegal, Belgium","November 29, 2019",2019,TV-14,106 min,"Dramas, Independent Movies, International Movies","Arranged to marry a rich man, young Ada is cru...",Movie
4,80213643,Chip and Potato,,"Abigail Oliver, Andrea Libman, Briana Buckmast...","Canada, United Kingdom",,2019,TV-Y,2 Seasons,Kids' TV,"Lovable pug Chip starts kindergarten, makes ne...",TV Show


### 2. Filter rows that contain a certain string

In [10]:
#Filter for horrors only coz Halloween is around the corner

In [11]:
#We use str.contains
mask = data['listed_in'].str.contains('horror', case=False, na=False)
horrors = data[mask]
data[mask].sample(3)

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
383,80043834,Sinister 2,Ciarán Foy,"Shannyn Sossamon, John Beasley, James Ransone,...","United States, United Kingdom","October 16, 2019",2015,R,97 min,Horror Movies,A single mother and her twin sons move into a ...,Movie
477,81168340,Medium,Jacek Koprowicz,"Władysław Kowalski, Michał Bajor, Jerzy Zelnik...","Poland, West Germany","October 1, 2019",1985,TV-MA,90 min,"Horror Movies, International Movies, Thrillers","In 1930s Poland, a powerful psychic steers a g...",Movie
4344,70304988,Stonehearst Asylum,Brad Anderson,"Kate Beckinsale, Jim Sturgess, David Thewlis, ...",United States,"September 8, 2017",2014,PG-13,113 min,"Horror Movies, Thrillers","In 1899, a young doctor arrives at an asylum f...",Movie


In [12]:
#Check for symbols such as a comma. The filtered data has a lot of NaNs
mask = data['cast'].str.contains(',', na=False)
data[~mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
1,81197050,Guatemala: Heart of the Mayan World,"Luis Ara, Ignacio Jaunsolo",Christian Morales,,"November 30, 2019",2019,TV-G,67 min,"Documentaries, International Movies","From Sierra de las Minas to Esquipulas, explor...",Movie
15,80990849,The Movies That Made Us,,,United States,"November 29, 2019",2019,TV-MA,1 Season,Docuseries,These blockbusters brought us together and gav...,TV Show
18,81161538,Lugar de Mulher,,,Brazil,"November 28, 2019",2019,TV-MA,1 Season,"International TV Shows, Stand-Up Comedy & Talk...","Four comedians from Brazil riff on sexuality, ...",TV Show
22,81002391,Broken,,,United States,"November 27, 2019",2019,TV-14,1 Season,Docuseries,This investigative docuseries shows how neglig...,TV Show
28,81062293,Mike Birbiglia: The New One,Seth Barrish,Mike Birbiglia,United States,"November 26, 2019",2019,TV-MA,86 min,Stand-Up Comedy,Comedian Mike Birbiglia hits Broadway with a h...,Movie


In [13]:
#Drop the nans with cast as the column to drop by
data[~mask].dropna(axis=0, subset=['cast']).head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
1,81197050,Guatemala: Heart of the Mayan World,"Luis Ara, Ignacio Jaunsolo",Christian Morales,,"November 30, 2019",2019,TV-G,67 min,"Documentaries, International Movies","From Sierra de las Minas to Esquipulas, explor...",Movie
28,81062293,Mike Birbiglia: The New One,Seth Barrish,Mike Birbiglia,United States,"November 26, 2019",2019,TV-MA,86 min,Stand-Up Comedy,Comedian Mike Birbiglia hits Broadway with a h...,Movie
50,70091304,Mike Birbiglia: What I Should Have Said Was No...,,Mike Birbiglia,United States,"November 22, 2019",2008,TV-14,56 min,Stand-Up Comedy,Mike Birbiglia performs in this live concert s...,Movie
54,81218074,Shelby American,"Nate Adams, Adam Carolla",Carroll Shelby,United States,"November 22, 2019",2019,TV-14,119 min,"Documentaries, Sports Movies","Featuring interviews and vintage footage, this...",Movie
66,80221584,"Bikram: Yogi, Guru, Predator",Eva Orner,Bikram Choudhury,United States,"November 20, 2019",2019,TV-MA,86 min,Documentaries,This documentary charts the rise and fall of h...,Movie


Checking for special symbols and characters

In [14]:
#We create a new dataframe with special characters
df = pd.DataFrame({'a':['dsa^', '^+"+', '+-+','as@asads']})
df

Unnamed: 0,a
0,dsa^
1,"^+""+"
2,+-+
3,as@asads


In [15]:
df['a'].str.contains('^')

0    True
1    True
2    True
3    True
Name: a, dtype: bool

In [16]:
df['a'].str.contains('^', regex=False)

0     True
1     True
2    False
3    False
Name: a, dtype: bool

In [17]:
df['a'].str.contains('\^')

0     True
1     True
2    False
3    False
Name: a, dtype: bool

In [18]:
#df['a'].str.contains('+') #this returns an error
df['a'].str.contains('+', regex=False)

0    False
1     True
2     True
3    False
Name: a, dtype: bool

### 3. Filter rows with either OR 2 substring

In [19]:
#Naturally after a good scare we want something funny to cancel out the jitters
#Filter stand up or thrillers

In [20]:
mask = data['listed_in'].str.contains('horror|stand-up', case=False)
horror_n_standup = data[mask]
horror_n_standup
#data[mask].head()
data[mask].sample(3)

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
3990,80203784,Rise of the Zombie,"Devaki Singh, Luke Kenny","Luke Kenny, Kirti Kulhari, Ashwin Mushran, Ben...",India,"December 15, 2017",2013,TV-MA,83 min,"Horror Movies, International Movies",A heartbroken wildlife photographer throws him...,Movie
1495,81024557,Thriller,Dallas Jackson,"Jessica Allain, Luke Tennie, Tequan Richmond, ...",United States,"April 14, 2019",2018,TV-MA,87 min,Horror Movies,"When a prank goes wrong, a shy kid from Compto...",Movie
2588,81015317,Tales From the Hood 2,"Rusty Cundieff, Darin Scott","Keith David, Bryan Batt, Alexandria Deberry, B...",United States,"October 10, 2018",2018,R,110 min,"Cult Movies, Horror Movies, Independent Movies",Buckle up for an anthology of socially conscio...,Movie


In [21]:
#another way to accomplish above. Create 2 masks
mask1 = data['listed_in'].str.contains('horror', case=False)
mask2 = data['listed_in'].str.contains('stand-up', case=False)

data[mask1 | mask2].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
18,81161538,Lugar de Mulher,,,Brazil,"November 28, 2019",2019,TV-MA,1 Season,"International TV Shows, Stand-Up Comedy & Talk...","Four comedians from Brazil riff on sexuality, ...",TV Show
28,81062293,Mike Birbiglia: The New One,Seth Barrish,Mike Birbiglia,United States,"November 26, 2019",2019,TV-MA,86 min,Stand-Up Comedy,Comedian Mike Birbiglia hits Broadway with a h...,Movie
32,81140259,Zona Rosa,,"Manu NNa, Ana Julia Yeyé, Ray Contreras, Pablo...",Mexico,"November 26, 2019",2019,TV-MA,1 Season,"International TV Shows, Spanish-Language TV Sh...",An assortment of talent takes the stage for a ...,TV Show
50,70091304,Mike Birbiglia: What I Should Have Said Was No...,,Mike Birbiglia,United States,"November 22, 2019",2008,TV-14,56 min,Stand-Up Comedy,Mike Birbiglia performs in this live concert s...,Movie
68,70122722,Carriers,"Àlex Pastor, David Pastor","Lou Taylor Pucci, Chris Pine, Piper Perabo, Em...",United States,"November 20, 2019",2009,PG-13,85 min,"Horror Movies, Sci-Fi & Fantasy, Thrillers","As a lethal virus spreads globally, four frien...",Movie


In [22]:
#other ways to create multiple masks
mask1 = (data['listed_in'].str.contains('horror', case=False, na=False))
mask2 = (data['type'].isin(['TV Show']))
horror_shows = data[mask1 & mask2]
horror_shows.head(3)

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
327,81017316,Prank Encounters,,Gaten Matarazzo,United States,"October 25, 2019",2019,TV-MA,1 Season,"Reality TV, TV Comedies, TV Horror",Monstrous frights meet hilarious reveals on th...,TV Show
333,80197462,Daybreak,,"Colin Ford, Alyvia Alyn Lind, Sophie Simnett, ...",United States,"October 24, 2019",2019,TV-MA,1 Season,"TV Action & Adventure, TV Comedies, TV Horror","Living his best life in post-apocalyptic LA, a...",TV Show
397,80195378,Haunted,,,"United States, Czech Republic",,2019,TV-MA,2 Seasons,"Reality TV, TV Horror, TV Thrillers",Real people sit down with friends and family t...,TV Show


### 4. Filter and return where both substrings are present

In [23]:
#When order matters..Us name must appear first before mexico in the column
mask = data['country'].str.contains('states.*mexico', case=False, na=False)
us_mexico = data[mask]
#us_mexico.head()
data[mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
8,70205672,La Reina del Sur,,"Kate del Castillo, Cristina Urgel, Alberto Jim...","United States, Spain, Colombia, Mexico",,2019,TV-14,2 Seasons,"Crime TV Shows, International TV Shows, Spanis...",This compelling show tells the story of the le...,TV Show
133,81034946,Maradona in Mexico,,Diego Armando Maradona,"Argentina, United States, Mexico","November 13, 2019",2020,TV-MA,1 Season,"Docuseries, Spanish-Language TV Shows","In this docuseries, soccer great Diego Maradon...",TV Show
179,70135899,Don't Be Afraid of the Dark,Troy Nixey,"Katie Holmes, Guy Pearce, Bailee Madison, Jack...","United States, Australia, Mexico","November 2, 2019",2010,R,99 min,Horror Movies,Young Sally Hurst discovers she isn't alone in...,Movie
252,70044883,Nacho Libre,Jared Hess,"Jack Black, Ana de la Reguera, Héctor Jiménez,...","United States, Mexico","November 1, 2019",2006,PG,92 min,"Comedies, Sports Movies",When Ignacio learns of an orphanage's financia...,Movie
534,80244682,Birders,Otilia Portillo Padua,,"United States, Mexico","September 25, 2019",2019,TV-G,38 min,"Documentaries, International Movies",Bird watchers on both sides of the U.S.-Mexico...,Movie


In [24]:
#When order does not matter
mask = data['country'].str.contains('states.*mexico|mexico.*states', case=False, na=False)

data[mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
8,70205672,La Reina del Sur,,"Kate del Castillo, Cristina Urgel, Alberto Jim...","United States, Spain, Colombia, Mexico",,2019,TV-14,2 Seasons,"Crime TV Shows, International TV Shows, Spanis...",This compelling show tells the story of the le...,TV Show
133,81034946,Maradona in Mexico,,Diego Armando Maradona,"Argentina, United States, Mexico","November 13, 2019",2020,TV-MA,1 Season,"Docuseries, Spanish-Language TV Shows","In this docuseries, soccer great Diego Maradon...",TV Show
179,70135899,Don't Be Afraid of the Dark,Troy Nixey,"Katie Holmes, Guy Pearce, Bailee Madison, Jack...","United States, Australia, Mexico","November 2, 2019",2010,R,99 min,Horror Movies,Young Sally Hurst discovers she isn't alone in...,Movie
233,80153894,How to Be a Latin Lover,Ken Marino,"Eugenio Derbez, Salma Hayek, Rob Lowe, Raphael...","Mexico, United States","November 1, 2019",2017,PG-13,116 min,Comedies,When an aging Lothario gets the boot from his ...,Movie
252,70044883,Nacho Libre,Jared Hess,"Jack Black, Ana de la Reguera, Héctor Jiménez,...","United States, Mexico","November 1, 2019",2006,PG,92 min,"Comedies, Sports Movies",When Ignacio learns of an orphanage's financia...,Movie


In [25]:
#Another way to filter when order of appearance (us or mexico) does not matter
mask1 = (data['country'].str.contains('states', case=False, na=False)) 
mask2 = (data['country'].str.contains('mexico', case=False, na=False))

data[mask1&mask2].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
8,70205672,La Reina del Sur,,"Kate del Castillo, Cristina Urgel, Alberto Jim...","United States, Spain, Colombia, Mexico",,2019,TV-14,2 Seasons,"Crime TV Shows, International TV Shows, Spanis...",This compelling show tells the story of the le...,TV Show
133,81034946,Maradona in Mexico,,Diego Armando Maradona,"Argentina, United States, Mexico","November 13, 2019",2020,TV-MA,1 Season,"Docuseries, Spanish-Language TV Shows","In this docuseries, soccer great Diego Maradon...",TV Show
179,70135899,Don't Be Afraid of the Dark,Troy Nixey,"Katie Holmes, Guy Pearce, Bailee Madison, Jack...","United States, Australia, Mexico","November 2, 2019",2010,R,99 min,Horror Movies,Young Sally Hurst discovers she isn't alone in...,Movie
233,80153894,How to Be a Latin Lover,Ken Marino,"Eugenio Derbez, Salma Hayek, Rob Lowe, Raphael...","Mexico, United States","November 1, 2019",2017,PG-13,116 min,Comedies,When an aging Lothario gets the boot from his ...,Movie
252,70044883,Nacho Libre,Jared Hess,"Jack Black, Ana de la Reguera, Héctor Jiménez,...","United States, Mexico","November 1, 2019",2006,PG,92 min,"Comedies, Sports Movies",When Ignacio learns of an orphanage's financia...,Movie


### 5. Filter rows with numbers (non-string)

In [26]:
#Check which movies have an actual age rating

In [27]:
mask = data['rating'].str.contains('-[0-9]', na=False)
age_rating = data[mask]
data[mask].sample(3)

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
5484,80108985,Caught on Camera,,Nick Wallis,United Kingdom,,2015,TV-14,3 Seasons,"British TV Shows, Crime TV Shows, Docuseries","Nick Wallis presents crimes caught on CCTV, ce...",TV Show
2055,70202600,Shaka Zulu,,"Edward Fox, Robert Powell, Trevor Howard, Fion...","Italy, South Africa, West Germany, Australia, ...","January 10, 2019",1986,TV-14,1 Season,TV Dramas,This miniseries recounts the early 19th-centur...,TV Show
4901,70302835,Killa,Avinash Arun,"Amruta Subhash, Archit Deodhar, Parth Bhalerao...",India,"April 1, 2017",2014,TV-14,107 min,"Dramas, International Movies",Growing up poses challenges for Chinu when he ...,Movie


In [28]:
#check the filtered data for the rating categories present
age_rating['rating'].value_counts()

TV-14    1593
PG-13     227
NC-17       2
Name: rating, dtype: int64

### 6. Where a sub-string is present in many columns

In [29]:
#check where 'TV' substring is present in 'rating', 'listed-in' and 'type'

In [30]:
mask1 = data['rating'].str.contains('tv', case=False, na=False)
mask2 = data['listed_in'].str.contains('tv', case=False, na=False)
mask3 = data['type'].str.contains('tv', case=False, na=False)
data[mask1 & mask2 & mask3].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
0,81193313,Chocolate,,"Ha Ji-won, Yoon Kye-sang, Jang Seung-jo, Kang ...",South Korea,"November 30, 2019",2019,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",Brought together by meaningful meals in the pa...,TV Show
4,80213643,Chip and Potato,,"Abigail Oliver, Andrea Libman, Briana Buckmast...","Canada, United Kingdom",,2019,TV-Y,2 Seasons,Kids' TV,"Lovable pug Chip starts kindergarten, makes ne...",TV Show
8,70205672,La Reina del Sur,,"Kate del Castillo, Cristina Urgel, Alberto Jim...","United States, Spain, Colombia, Mexico",,2019,TV-14,2 Seasons,"Crime TV Shows, International TV Shows, Spanis...",This compelling show tells the story of the le...,TV Show
11,81094391,Sugar Rush Christmas,,"Hunter March, Candace Nelson, Adriano Zumbo",United States,"November 29, 2019",2019,TV-PG,1 Season,Reality TV,"It's everything you love about ""Sugar Rush"" – ...",TV Show
13,81152641,The Charming Stepmom,,"Shahkrit Yamnarm, View Wannarot Sontichai, Kri...",,"November 29, 2019",2019,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",A quirky fashion student becomes the nanny of ...,TV Show


In [31]:
#A different way to accoplish above using apply and lambda
cols_to_check = ['rating','listed_in','type']
pattern = 'tv'
mask = data[cols_to_check].apply(
        lambda col:col.str.contains(
        pattern, na=False, case=False)).all(axis=1)

data[mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
0,81193313,Chocolate,,"Ha Ji-won, Yoon Kye-sang, Jang Seung-jo, Kang ...",South Korea,"November 30, 2019",2019,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",Brought together by meaningful meals in the pa...,TV Show
4,80213643,Chip and Potato,,"Abigail Oliver, Andrea Libman, Briana Buckmast...","Canada, United Kingdom",,2019,TV-Y,2 Seasons,Kids' TV,"Lovable pug Chip starts kindergarten, makes ne...",TV Show
8,70205672,La Reina del Sur,,"Kate del Castillo, Cristina Urgel, Alberto Jim...","United States, Spain, Colombia, Mexico",,2019,TV-14,2 Seasons,"Crime TV Shows, International TV Shows, Spanis...",This compelling show tells the story of the le...,TV Show
11,81094391,Sugar Rush Christmas,,"Hunter March, Candace Nelson, Adriano Zumbo",United States,"November 29, 2019",2019,TV-PG,1 Season,Reality TV,"It's everything you love about ""Sugar Rush"" – ...",TV Show
13,81152641,The Charming Stepmom,,"Shahkrit Yamnarm, View Wannarot Sontichai, Kri...",,"November 29, 2019",2019,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",A quirky fashion student becomes the nanny of ...,TV Show


In [32]:
#The 3 masks before combining them using all
three_masks = data[cols_to_check].apply(
        lambda col:col.str.contains(
        pattern, na=False, case=False))
three_masks.head()

Unnamed: 0,rating,listed_in,type
0,True,True,True
1,True,False,False
2,True,False,False
3,True,False,False
4,True,True,True


In [33]:
#Results of using .all on the 3 masks
three_masks.all(axis=1).head()

0     True
1    False
2    False
3    False
4     True
dtype: bool

In [34]:
#Results of using .any on the 3 masks
three_masks.any(axis=1).head()

0    True
1    True
2    True
3    True
4    True
dtype: bool


### 7. Filter rows where the value in one column is present in another column

In [35]:
#Filter which directors starred in their own movies (director in cast)

In [36]:
#TypeError
#data.apply(lambda x: x['director'] in x['cast'], axis=1)

In [37]:
#Create the mask. We see there are alot of nans where both cast and director have nans
mask = data.apply(
    lambda x: 
    str(x['director']) in str(x['cast']), 
    axis=1)

data[mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
9,81172841,Lagos Real Fake Life,Mike Ezuruonye,"Nonso Diobi, Mike Ezuruonye, Mercy Aigbe, Rex ...",,"November 29, 2019",2018,TV-14,118 min,"Comedies, International Movies",Two mooching friends vie for the attention of ...,Movie
15,80990849,The Movies That Made Us,,,United States,"November 29, 2019",2019,TV-MA,1 Season,Docuseries,These blockbusters brought us together and gav...,TV Show
18,81161538,Lugar de Mulher,,,Brazil,"November 28, 2019",2019,TV-MA,1 Season,"International TV Shows, Stand-Up Comedy & Talk...","Four comedians from Brazil riff on sexuality, ...",TV Show
22,81002391,Broken,,,United States,"November 27, 2019",2019,TV-14,1 Season,Docuseries,This investigative docuseries shows how neglig...,TV Show
52,81169145,Narcoworld: Dope Stories,,,United States,"November 22, 2019",2019,TV-MA,1 Season,"Crime TV Shows, Docuseries",Ride along as police officers and drug smuggle...,TV Show


In [38]:
#We drop the nans by the director column
data[mask].dropna(subset=['director']).head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
9,81172841,Lagos Real Fake Life,Mike Ezuruonye,"Nonso Diobi, Mike Ezuruonye, Mercy Aigbe, Rex ...",,"November 29, 2019",2018,TV-14,118 min,"Comedies, International Movies",Two mooching friends vie for the attention of ...,Movie
64,292118,Beavis and Butt-head Do America,Mike Judge,"Mike Judge, Bruce Willis, Demi Moore, Cloris L...",United States,"November 20, 2019",1996,PG-13,81 min,"Comedies, Cult Movies","After realizing that their boob tube is gone, ...",Movie
116,80183187,Klaus,Sergio Pablos,"Jason Schwartzman, J.K. Simmons, Rashida Jones...",Spain,"November 15, 2019",2019,PG,98 min,"Children & Family Movies, Comedies",A selfish postman and a reclusive toymaker for...,Movie
172,81206389,Oththa Seruppu Size 7,Parthiban,Parthiban,India,"November 4, 2019",2019,TV-MA,103 min,"Dramas, International Movies, Thrillers","Taken into custody, a murder suspect's theatri...",Movie
263,973861,Sling Blade,Billy Bob Thornton,"Billy Bob Thornton, Dwight Yoakam, J.T. Walsh,...",United States,"November 1, 2019",1996,R,135 min,"Dramas, Independent Movies",A man returns home after being released from a...,Movie


In [39]:
#Another way for above. We first drop the nans, create the mask and apply it on the df without nans
df1 = data.dropna(subset=['director'])
mask = df1.apply(lambda x: str(x['director']) in str(x['cast']), axis=1)
df1[mask].head()

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
9,81172841,Lagos Real Fake Life,Mike Ezuruonye,"Nonso Diobi, Mike Ezuruonye, Mercy Aigbe, Rex ...",,"November 29, 2019",2018,TV-14,118 min,"Comedies, International Movies",Two mooching friends vie for the attention of ...,Movie
64,292118,Beavis and Butt-head Do America,Mike Judge,"Mike Judge, Bruce Willis, Demi Moore, Cloris L...",United States,"November 20, 2019",1996,PG-13,81 min,"Comedies, Cult Movies","After realizing that their boob tube is gone, ...",Movie
116,80183187,Klaus,Sergio Pablos,"Jason Schwartzman, J.K. Simmons, Rashida Jones...",Spain,"November 15, 2019",2019,PG,98 min,"Children & Family Movies, Comedies",A selfish postman and a reclusive toymaker for...,Movie
172,81206389,Oththa Seruppu Size 7,Parthiban,Parthiban,India,"November 4, 2019",2019,TV-MA,103 min,"Dramas, International Movies, Thrillers","Taken into custody, a murder suspect's theatri...",Movie
263,973861,Sling Blade,Billy Bob Thornton,"Billy Bob Thornton, Dwight Yoakam, J.T. Walsh,...",United States,"November 1, 2019",1996,R,135 min,"Dramas, Independent Movies",A man returns home after being released from a...,Movie


### 8. Checking column names (or index values) for a given sub-string

column names (or header)

In [40]:
#Check for 'in' in the column names
data.filter(like='in', axis=1)

Unnamed: 0,rating,listed_in
0,TV-14,"International TV Shows, Korean TV Shows, Roman..."
1,TV-G,"Documentaries, International Movies"
2,TV-14,"Comedies, Dramas, International Movies"
3,TV-14,"Dramas, Independent Movies, International Movies"
4,TV-Y,Kids' TV
...,...,...
5832,NR,"Cult Movies, Horror Movies"
5833,TV-14,Horror Movies
5834,NR,"Dramas, International Movies"
5835,TV-MA,Stand-Up Comedy & Talk Shows


In [41]:
#same results as above
data.loc[:, data.columns.str.contains('in')]

Unnamed: 0,rating,listed_in
0,TV-14,"International TV Shows, Korean TV Shows, Roman..."
1,TV-G,"Documentaries, International Movies"
2,TV-14,"Comedies, Dramas, International Movies"
3,TV-14,"Dramas, Independent Movies, International Movies"
4,TV-Y,Kids' TV
...,...,...
5832,NR,"Cult Movies, Horror Movies"
5833,TV-14,Horror Movies
5834,NR,"Dramas, International Movies"
5835,TV-MA,Stand-Up Comedy & Talk Shows


Index values

In [42]:
#First we set the title as the index so that we can search in it
df = data.set_index('title')

In [43]:
#Use .filter to check for 'love' in the index
df.filter(like='Love', axis=0)

Unnamed: 0_level_0,show_id,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Love, Rosie",80028357,Christian Ditter,"Lily Collins, Sam Claflin, Christian Cooke, Ja...","Germany, United Kingdom","November 20, 2019",2014,R,103 min,"Comedies, International Movies, Romantic Movies","Over the years, as they come and go in each ot...",Movie
The Love Guru,70087538,Marco Schnabel,"Mike Myers, Jessica Alba, Justin Timberlake, B...","United Kingdom, Germany, United States","November 20, 2019",2008,PG-13,87 min,"Comedies, Sports Movies","Raised on an ashram in India, a self-styled lo...",Movie
The Lovers,80167628,Azazel Jacobs,"Aidan Gillen, Melora Walters, Tyler Ross, Jess...",United States,"November 5, 2019",2017,R,98 min,"Comedies, Dramas, Independent Movies","On the verge of divorce, a middle-aged couple,...",Movie
Tune in for Love,81165326,Jung Ji-woo,"Kim Go-eun, Jung Hae-in, Park Hae-joon, Kim Gu...",South Korea,"November 5, 2019",2019,TV-MA,123 min,"Dramas, International Movies, Romantic Movies",A student and a reticent teen first meet at a ...,Movie
How to Be a Latin Lover,80153894,Ken Marino,"Eugenio Derbez, Salma Hayek, Rob Lowe, Raphael...","Mexico, United States","November 1, 2019",2017,PG-13,116 min,Comedies,When an aging Lothario gets the boot from his ...,Movie
...,...,...,...,...,...,...,...,...,...,...,...
Murphy's Law of Love,80111556,,"Danson Tang, Chia-ying Lee, Jolin Chien, Jenna...",Taiwan,"October 1, 2016",2015,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",An online matchmaker who's convinced she's liv...,TV Show
The Year of Happiness and Love,80111275,,"Amber Kuo, James Wen, Weber Yang, Sonia Sui, N...",Taiwan,"September 1, 2016",2009,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",Jiang Chen-bo is a loser without prospects for...,TV Show
Lavell Crawford: Can a Brother Get Some Love?,70204316,Michael Drumm,Lavell Crawford,United States,"August 2, 2016",2011,NR,81 min,Stand-Up Comedy,"""Last Comic Standing"" runner-up Lavell Crawfor...",Movie
Love Cuisine,80106611,,"Lego Lee, Allison Lin, Duncan Chow, Nita Lei, ...",Taiwan,"August 1, 2016",2015,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ...",A successful Taiwanese chef moves home from Eu...,TV Show


In [44]:
#using .loc
df.loc[df.index.str.contains('Love'), :]

Unnamed: 0_level_0,show_id,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Love, Rosie",80028357,Christian Ditter,"Lily Collins, Sam Claflin, Christian Cooke, Ja...","Germany, United Kingdom","November 20, 2019",2014,R,103 min,"Comedies, International Movies, Romantic Movies","Over the years, as they come and go in each ot...",Movie
The Love Guru,70087538,Marco Schnabel,"Mike Myers, Jessica Alba, Justin Timberlake, B...","United Kingdom, Germany, United States","November 20, 2019",2008,PG-13,87 min,"Comedies, Sports Movies","Raised on an ashram in India, a self-styled lo...",Movie
The Lovers,80167628,Azazel Jacobs,"Aidan Gillen, Melora Walters, Tyler Ross, Jess...",United States,"November 5, 2019",2017,R,98 min,"Comedies, Dramas, Independent Movies","On the verge of divorce, a middle-aged couple,...",Movie
Tune in for Love,81165326,Jung Ji-woo,"Kim Go-eun, Jung Hae-in, Park Hae-joon, Kim Gu...",South Korea,"November 5, 2019",2019,TV-MA,123 min,"Dramas, International Movies, Romantic Movies",A student and a reticent teen first meet at a ...,Movie
How to Be a Latin Lover,80153894,Ken Marino,"Eugenio Derbez, Salma Hayek, Rob Lowe, Raphael...","Mexico, United States","November 1, 2019",2017,PG-13,116 min,Comedies,When an aging Lothario gets the boot from his ...,Movie
...,...,...,...,...,...,...,...,...,...,...,...
Murphy's Law of Love,80111556,,"Danson Tang, Chia-ying Lee, Jolin Chien, Jenna...",Taiwan,"October 1, 2016",2015,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",An online matchmaker who's convinced she's liv...,TV Show
The Year of Happiness and Love,80111275,,"Amber Kuo, James Wen, Weber Yang, Sonia Sui, N...",Taiwan,"September 1, 2016",2009,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",Jiang Chen-bo is a loser without prospects for...,TV Show
Lavell Crawford: Can a Brother Get Some Love?,70204316,Michael Drumm,Lavell Crawford,United States,"August 2, 2016",2011,NR,81 min,Stand-Up Comedy,"""Last Comic Standing"" runner-up Lavell Crawfor...",Movie
Love Cuisine,80106611,,"Lego Lee, Allison Lin, Duncan Chow, Nita Lei, ...",Taiwan,"August 1, 2016",2015,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ...",A successful Taiwanese chef moves home from Eu...,TV Show


### Other ways to filter

#### using df.query()

In [45]:
data.query('country == "South Africa"')

Unnamed: 0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,type
7,81227195,Kalushi: The Story of Solomon Mahlangu,Mandla Dube,"Thabo Rametsi, Thabo Malema, Welile Nzuza, Jaf...",South Africa,"November 29, 2019",2016,TV-MA,107 min,"Dramas, International Movies",The life and times of iconic South African lib...,Movie
351,81172729,10 Days in Sun City,Adze Ugah,"Ayo Makun, Adesua Etomi, Richard Mofe-Damijo, ...",South Africa,"October 18, 2019",2017,TV-14,87 min,"Comedies, International Movies, Romantic Movies",After his girlfriend wins the Miss Nigeria pag...,Movie
2498,80155792,Vaya,Akin Omotoso,"Mncedisi Shabangu, Sihle Xaba, Sibusiso Msiman...",South Africa,"November 1, 2018",2016,TV-MA,106 min,"Dramas, Independent Movies, International Movies",Three youths from rural South Africa set out f...,Movie
3281,80218111,Catching Feelings,Kagiso Lediga,"Kagiso Lediga, Pearl Thusi, Akin Omotoso, Andr...",South Africa,"May 18, 2018",2018,TV-MA,117 min,"Comedies, International Movies, Romantic Movies","Amid growing tensions in their marriage, a Joh...",Movie


#### using other Series.str functions

In [46]:
data['cast'].str.len() <10

0       False
1       False
2       False
3       False
4       False
        ...  
5832    False
5833    False
5834    False
5835    False
5836    False
Name: cast, Length: 5837, dtype: bool

In [47]:
mask = data['date_added'].str.startswith('Nov', na=False)

In [48]:
data['duration'].str.endswith('min', na=False)

0       False
1        True
2        True
3        True
4       False
        ...  
5832     True
5833     True
5834     True
5835    False
5836     True
Name: duration, Length: 5837, dtype: bool