In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from pandas.testing import assert_frame_equal
import matplotlib.pyplot as plt
import seaborn as sns
p1 = Path.cwd() / 'back_data'

In [2]:
# 불리언 배열이란 원시 데이터와 동일한 인덱스를 가진 Series지만, 값은 True / False로 구성 -> 불리언 값을 가진 Series

In [3]:
movie = pd.read_csv(p1 / 'movie.csv', index_col='movie_title')

In [6]:
# 비교 연산자 대신 gt() 메서드 활용 -> 메서드 체인에 유용
movie_2_hours = movie['duration'].dropna().gt(120)
# 파이썬은 True와 False를 각각 1, 0으로 계산 -> 합, 평균 수치 가능
movie_2_hours.sum(), movie_2_hours.mean() * 100, movie_2_hours.describe()

(1039,
 21.199755152009793,
 count      4901
 unique        2
 top       False
 freq       3862
 Name: duration, dtype: object)

In [9]:
# 파이썬에서 불리언 표현식은 and, or, not 이용 -> but pandas 불리언 인덱스에서는 &, |, ~ 로 대체
# and, or, not을 쓰면 객체 전체(Series 전체)의 truthiness를 찾으려고 시도 -> 이치에 맞지 않고 개별 값의 True / False를 찾아야
# '&, |, ~' 등 비트별 연산자는 비교 연산자보다 우선순위가 높으므로 괄호 필요
criteria1 = movie.imdb_score > 8
criteria2 = movie.content_rating == 'PG-13'
criteria3 = (movie.title_year < 2000) | (movie.title_year > 2009)
criteria_final = criteria1 & criteria2 & criteria3
criteria_final.head()

movie_title
Avatar                                        False
Pirates of the Caribbean: At World's End      False
Spectre                                       False
The Dark Knight Rises                          True
Star Wars: Episode VII - The Force Awakens    False
dtype: bool

In [12]:
crit_a1 = movie.imdb_score > 8
crit_a2 = movie.content_rating == 'PG-13'
crit_a3 = (movie.title_year < 2000) | (movie.title_year > 2009)
final_crit_a = crit_a1 & crit_a2 & crit_a3
crit_b1 = movie.imdb_score < 5
crit_b2 = movie.content_rating == 'R'
crit_b3 = (movie.title_year >= 2000) & (movie.title_year <= 2010)
final_crit_b = crit_b1 & crit_b2 & crit_b3
final_crit_all = final_crit_a | final_crit_b
# 불리언 배열을 활용한 필터링 가능 -> 인덱스 직접 연산 혹은 loc 속성 활용 가능, 가급적 loc 속성 활용
movie[final_crit_all].head(), movie.loc[final_crit_all].head(), movie.loc[final_crit_all, ['imdb_score', 'content_rating', 'title_year']].head()

(                            color      director_name  num_critic_for_reviews  \
 movie_title                                                                    
 The Dark Knight Rises       Color  Christopher Nolan                   813.0   
 The Avengers                Color        Joss Whedon                   703.0   
 Captain America: Civil War  Color      Anthony Russo                   516.0   
 Guardians of the Galaxy     Color         James Gunn                   653.0   
 Interstellar                Color  Christopher Nolan                   712.0   
 
                             duration  director_facebook_likes  \
 movie_title                                                     
 The Dark Knight Rises          164.0                  22000.0   
 The Avengers                   173.0                      0.0   
 Captain America: Civil War     147.0                     94.0   
 Guardians of the Galaxy        121.0                    571.0   
 Interstellar                   169

In [14]:
# iloc 속성은 불리언 배열 미지원, but numpy 배열에서는 작동하므로 to_numpy() 이용하여 가능
movie.iloc[final_crit_all.to_numpy()].head()

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
The Avengers,Color,Joss Whedon,703.0,173.0,0.0,19000.0,Robert Downey Jr.,26000.0,623279547.0,Action|Adventure|Sci-Fi,...,1722.0,English,USA,PG-13,220000000.0,2012.0,21000.0,8.1,1.85,123000
Captain America: Civil War,Color,Anthony Russo,516.0,147.0,94.0,11000.0,Scarlett Johansson,21000.0,407197282.0,Action|Adventure|Sci-Fi,...,1022.0,English,USA,PG-13,250000000.0,2016.0,19000.0,8.2,2.35,72000
Guardians of the Galaxy,Color,James Gunn,653.0,121.0,571.0,3000.0,Vin Diesel,14000.0,333130696.0,Action|Adventure|Sci-Fi,...,1097.0,English,USA,PG-13,170000000.0,2014.0,14000.0,8.1,2.35,96000
Interstellar,Color,Christopher Nolan,712.0,169.0,22000.0,6000.0,Anne Hathaway,11000.0,187991439.0,Adventure|Drama|Sci-Fi,...,2725.0,English,USA,PG-13,165000000.0,2014.0,11000.0,8.6,2.35,349000
