# Chapter 1. Pandas Foundations

In [1]:
import pandas as pd
import numpy as pn

## Dissecting the anatomy of a DataFrame

In [3]:
pd.set_option('max_columns', 8, 'max_rows', 10)

In [4]:
movie = pd.read_csv('data/movie.csv')
movie.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,...,23000.0,8.5,2.35,164000
4,,Doug Walker,,,...,12.0,7.1,,0


In [5]:
cols = movie.columns
idx = movie.index
data = movie.values
data

array([['Color', 'James Cameron', 723.0, ..., 7.9, 1.78, 33000],
       ['Color', 'Gore Verbinski', 302.0, ..., 7.1, 2.35, 0],
       ['Color', 'Sam Mendes', 602.0, ..., 6.8, 2.35, 85000],
       ...,
       ['Color', 'Benjamin Roberds', 13.0, ..., 6.3, nan, 16],
       ['Color', 'Daniel Hsia', 14.0, ..., 6.3, 2.35, 660],
       ['Color', 'Jon Gunn', 43.0, ..., 6.6, 1.85, 456]], dtype=object)

In [6]:
movie.dtypes

color                       object
director_name               object
num_critic_for_reviews     float64
duration                   float64
director_facebook_likes    float64
                            ...   
title_year                 float64
actor_2_facebook_likes     float64
imdb_score                 float64
aspect_ratio               float64
movie_facebook_likes         int64
Length: 28, dtype: object

In [11]:
s_attr_methods = set(dir(pd.Series))
len(s_attr_methods)

421

In [12]:
df_attr_methods = set(dir(pd.DataFrame))
len(df_attr_methods)

432

In [13]:
len(s_attr_methods & df_attr_methods)

365

In [14]:
director = movie['director_name']
director.value_counts()

Steven Spielberg    26
Woody Allen         22
Martin Scorsese     20
Clint Eastwood      20
Ridley Scott        16
                    ..
John Putch           1
Luca Guadagnino      1
Sam Fell             1
Dan Fogelman         1
Daniel Hsia          1
Name: director_name, Length: 2397, dtype: int64

In [15]:
director.describe()

count                 4814
unique                2397
top       Steven Spielberg
freq                    26
Name: director_name, dtype: object

In [16]:
director.isnull()

0       False
1       False
2       False
3       False
4       False
        ...  
4911    False
4912     True
4913    False
4914    False
4915    False
Name: director_name, Length: 4916, dtype: bool

In [18]:
director == 'James Cameron'

0        True
1       False
2       False
3       False
4       False
        ...  
4911    False
4912    False
4913    False
4914    False
4915    False
Name: director_name, Length: 4916, dtype: bool

In [20]:
movie.columns

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')

In [23]:
lmdb_score = movie['imdb_score']
lmdb_score.floordiv(7)


0       1.0
1       1.0
2       0.0
3       1.0
4       1.0
       ... 
4911    1.0
4912    1.0
4913    0.0
4914    0.0
4915    0.0
Name: imdb_score, Length: 4916, dtype: float64

In [24]:
lmdb_score >= 7

0        True
1        True
2       False
3        True
4        True
        ...  
4911     True
4912     True
4913    False
4914    False
4915    False
Name: imdb_score, Length: 4916, dtype: bool

In [25]:
movie = pd.read_csv('data/movie.csv')
actor_1_fb_likes = movie['actor_1_facebook_likes']
director = movie['director_name']

director.value_counts().head()

Steven Spielberg    26
Woody Allen         22
Martin Scorsese     20
Clint Eastwood      20
Ridley Scott        16
Name: director_name, dtype: int64

In [26]:
actor_1_fb_likes.isnull().sum()

7

In [27]:
actor_1_fb_likes.fillna(0)\
    .astype(int)\
    .head()

0     1000
1    40000
2    11000
3    27000
4      131
Name: actor_1_facebook_likes, dtype: int64

In [28]:
movie2 = movie.set_index('movie_title')
movie2

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avatar,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
Spectre,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,...,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,...,12.0,7.1,,0
...,...,...,...,...,...,...,...,...,...
Signed Sealed Delivered,Color,Scott Smith,1.0,87.0,...,470.0,7.7,,84
The Following,Color,,43.0,43.0,...,593.0,7.5,16.00,32000
A Plague So Pleasant,Color,Benjamin Roberds,13.0,76.0,...,0.0,6.3,,16
Shanghai Calling,Color,Daniel Hsia,14.0,100.0,...,719.0,6.3,2.35,660


In [29]:
pd.read_csv('data/movie.csv', index_col='movie_title')

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avatar,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
Spectre,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,...,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,...,12.0,7.1,,0
...,...,...,...,...,...,...,...,...,...
Signed Sealed Delivered,Color,Scott Smith,1.0,87.0,...,470.0,7.7,,84
The Following,Color,,43.0,43.0,...,593.0,7.5,16.00,32000
A Plague So Pleasant,Color,Benjamin Roberds,13.0,76.0,...,0.0,6.3,,16
Shanghai Calling,Color,Daniel Hsia,14.0,100.0,...,719.0,6.3,2.35,660


In [31]:
idx_rename = {'Avatar': 'Ratava', 'Spectre': 'Ertceps'}
col_rename = {'director_name':'Director Name', 'num_critic_for_review':'Critical Reviews'}
movie.rename(index=idx_rename, columns=col_rename).head()

Unnamed: 0,color,Director Name,num_critic_for_reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,...,23000.0,8.5,2.35,164000
4,,Doug Walker,,,...,12.0,7.1,,0


In [34]:
movie = pd.read_csv('data/movie.csv', index_col='movie_title')
index = movie.index
columns = movie.columns

index_list = index.tolist()
column_list = columns.tolist()

index_list[0] = 'Ratava'
index_list[2] = 'Ertceps'
column_list[1] = 'Director Name'
column_list[2] = 'Critical Reviews'

In [35]:
print(index_list[:5])

['Ratava', "Pirates of the Caribbean: At World's End", 'Ertceps', 'The Dark Knight Rises', 'Star Wars: Episode VII - The Force Awakens']


In [37]:
movie['actor_director_facebook_likes'] = (movie['actor_1_facebook_likes'] + 
                                              movie['actor_2_facebook_likes'] + 
                                              movie['actor_3_facebook_likes'] + 
                                              movie['director_facebook_likes'])

movie['actor_director_facebook_likes']                                            

movie_title
Avatar                                         2791.0
Pirates of the Caribbean: At World's End      46563.0
Spectre                                       11554.0
The Dark Knight Rises                         95000.0
Star Wars: Episode VII - The Force Awakens        NaN
                                               ...   
Signed Sealed Delivered                        1427.0
The Following                                     NaN
A Plague So Pleasant                              0.0
Shanghai Calling                               2154.0
My Date with Drew                               141.0
Name: actor_director_facebook_likes, Length: 4916, dtype: float64

In [39]:
movie['is_cast_likes_more'] = (movie['cast_total_facebook_likes'] >= 
                                  movie['actor_director_facebook_likes'])
                                  
movie['is_cast_likes_more']                                  

movie_title
Avatar                                         True
Pirates of the Caribbean: At World's End       True
Spectre                                        True
The Dark Knight Rises                          True
Star Wars: Episode VII - The Force Awakens    False
                                              ...  
Signed Sealed Delivered                        True
The Following                                 False
A Plague So Pleasant                           True
Shanghai Calling                               True
My Date with Drew                              True
Name: is_cast_likes_more, Length: 4916, dtype: bool

In [42]:
movie['actor_total_facebook_likes'] = (movie['actor_1_facebook_likes'] + \
                                              movie['actor_2_facebook_likes'] + \
                                              movie['actor_3_facebook_likes'] )

movie['actor_total_facebook_likes'] = movie['actor_total_facebook_likes'].fillna(0)                                             

In [43]:
profit_index = movie.columns.get_loc('gross') + 1
profit_index

9

In [44]:
movie.insert(loc=profit_index, column='profit', value=movie['gross']-movie['budget'])
movie.head()

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,...,movie_facebook_likes,actor_director_facebook_likes,is_cast_likes_more,actor_total_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avatar,Color,James Cameron,723.0,178.0,...,33000,2791.0,True,2791.0
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,...,0,46000.0,True,46000.0
Spectre,Color,Sam Mendes,602.0,148.0,...,85000,11554.0,True,11554.0
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,...,164000,73000.0,True,73000.0
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,...,0,,False,0.0
