# Filtering

In [1]:
import pandas as pd
pd.__version__

'1.0.3'

---

## Filtering Rows
- Using locally saved *imdb.csv* file

In [2]:
movies = pd.read_csv('Data/imdb.csv')
movies.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


In [3]:
movies.shape

(979, 6)

#### Filter method 1:
- Filter rows by duration
- Select Genre and Title

In [4]:
# Filter by duration using the bracket notation
movies[movies['duration'] >= 200][['title', 'genre']].head()

Unnamed: 0,title,genre
2,The Godfather: Part II,Crime
7,The Lord of the Rings: The Return of the King,Adventure
17,Seven Samurai,Drama
78,Once Upon a Time in America,Crime
85,Lawrence of Arabia,Adventure


#### Filter method 2 (Prefered):
- Filter using the *loc* method
- Select Genre and Title Columns

In [5]:
# Filer by duration using loc method
movies.loc[movies.duration >= 200, ['title', 'genre']].head()

Unnamed: 0,title,genre
2,The Godfather: Part II,Crime
7,The Lord of the Rings: The Return of the King,Adventure
17,Seven Samurai,Drama
78,Once Upon a Time in America,Crime
85,Lawrence of Arabia,Adventure


---

## Using Multiple Filtering Criteria
- Using locally saved *imdb.csv* file

In [6]:
movies = pd.read_csv('Data/imdb.csv')
movies.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


#### Filter based on Duration and Genre
- Filter for Dramas that are longer than 200 minutes
- Use (& and |) instead of (and, or)

In [7]:
movies.loc[(movies.duration >= 200) & (movies.genre == 'Drama'), :]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
17,8.7,Seven Samurai,UNRATED,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."
157,8.2,Gone with the Wind,G,Drama,238,"[u'Clark Gable', u'Vivien Leigh', u'Thomas Mit..."
476,7.8,Hamlet,PG-13,Drama,242,"[u'Kenneth Branagh', u'Julie Christie', u'Dere..."


In [8]:
movies.loc[(movies.duration >= 200) | (movies.genre == 'Drama'), :].head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."
13,8.8,Forrest Gump,PG-13,Drama,142,"[u'Tom Hanks', u'Robin Wright', u'Gary Sinise']"


#### Filter based on multiple categories within a Column
- Filter for movies based on duration and multiple Genre types
- Use *isin* method

In [9]:
genre_list = ['Drama', 'Crime', 'Action']
movies.loc[(movies.duration >= 200) & (movies.genre.isin(genre_list)), :]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
17,8.7,Seven Samurai,UNRATED,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."
78,8.4,Once Upon a Time in America,R,Crime,229,"[u'Robert De Niro', u'James Woods', u'Elizabet..."
157,8.2,Gone with the Wind,G,Drama,238,"[u'Clark Gable', u'Vivien Leigh', u'Thomas Mit..."
476,7.8,Hamlet,PG-13,Drama,242,"[u'Kenneth Branagh', u'Julie Christie', u'Dere..."
767,7.6,"It's a Mad, Mad, Mad, Mad World",APPROVED,Action,205,"[u'Spencer Tracy', u'Milton Berle', u'Ethel Me..."


---

## Iterating over a DataFrame

In [10]:
# Iterate over a DataFrame with iterrows
df = pd.read_csv('Data/imdb.csv', nrows=10)
for index, row in df.iterrows():
    print(f"{index}: {row.title}, {row.genre}")

0: The Shawshank Redemption, Crime
1: The Godfather, Crime
2: The Godfather: Part II, Crime
3: The Dark Knight, Action
4: Pulp Fiction, Crime
5: 12 Angry Men, Drama
6: The Good, the Bad and the Ugly, Western
7: The Lord of the Rings: The Return of the King, Adventure
8: Schindler's List, Biography
9: Fight Club, Drama
