# Introducing Pandas

## Importing a Data Set

In [5]:
import pandas as pd

In [6]:
pd.__version__

'2.1.1'

In [7]:
movies = pd.read_csv("data/movies.csv", index_col = "Title")

## Manipulating DataFrame

In [8]:
movies.head()

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Avengers: Endgame,1,Buena Vista,"$2,796.30",2019
Avatar,2,Fox,"$2,789.70",2009
Titanic,3,Paramount,"$2,187.50",1997
Star Wars: The Force Awakens,4,Buena Vista,"$2,068.20",2015
Avengers: Infinity War,5,Buena Vista,"$2,048.40",2018


In [9]:
movies.tail()

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Yogi Bear,778,Warner Brothers,$201.60,2010
Garfield: The Movie,779,Fox,$200.80,2004
Cats & Dogs,780,Warner Brothers,$200.70,2001
The Hunt for Red October,781,Paramount,$200.50,1990
Valkyrie,782,MGM,$200.30,2008


In [10]:
movies.iloc[499]

Rank           500
Studio         Fox
Gross     $288.30 
Year          2018
Name: Maze Runner: The Death Cure, dtype: object

In [11]:
movies.loc["Forrest Gump"]

Rank            119
Studio    Paramount
Gross      $677.90 
Year           1994
Name: Forrest Gump, dtype: object

In [12]:
movies.sort_values(by = "Year", ascending = False).head()

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Avengers: Endgame,1,Buena Vista,"$2,796.30",2019
John Wick: Chapter 3 - Parabellum,458,Lionsgate,$304.70,2019
The Wandering Earth,114,China Film Corporation,$699.80,2019
Toy Story 4,198,Buena Vista,$519.80,2019
How to Train Your Dragon: The Hidden World,199,Universal,$519.80,2019


In [13]:
movies.sort_values(by = ["Studio", "Year"]).head()

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
The Blair Witch Project,588,Artisan,$248.60,1999
101 Dalmatians,708,Buena Vista,$215.90,1961
The Jungle Book,755,Buena Vista,$205.80,1967
Who Framed Roger Rabbit,410,Buena Vista,$329.80,1988
Dead Poets Society,636,Buena Vista,$235.90,1989


In [14]:
movies.sort_index().head()

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"10,000 B.C.",536,Warner Brothers,$269.80,2008
101 Dalmatians,708,Buena Vista,$215.90,1961
101 Dalmatians,425,Buena Vista,$320.70,1996
2 Fast 2 Furious,632,Universal,$236.40,2003
2012,93,Sony,$769.70,2009


## Counting Values in a Series

In [15]:
movies["Studio"]

Title
Avengers: Endgame                   Buena Vista
Avatar                                      Fox
Titanic                               Paramount
Star Wars: The Force Awakens        Buena Vista
Avengers: Infinity War              Buena Vista
                                     ...       
Yogi Bear                       Warner Brothers
Garfield: The Movie                         Fox
Cats & Dogs                     Warner Brothers
The Hunt for Red October              Paramount
Valkyrie                                    MGM
Name: Studio, Length: 782, dtype: object

In [16]:
movies["Studio"].value_counts().head(10)

Studio
Warner Brothers    132
Buena Vista        125
Fox                117
Universal          109
Sony                86
Paramount           76
Dreamworks          27
Lionsgate           21
New Line            16
MGM                 11
Name: count, dtype: int64

## Filtering a Column by One or More Criteria

In [17]:
movies[movies["Studio"] == "Universal"]

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jurassic World,6,Universal,"$1,671.70",2015
Furious 7,8,Universal,"$1,516.00",2015
Jurassic World: Fallen Kingdom,13,Universal,"$1,309.50",2018
The Fate of the Furious,17,Universal,"$1,236.00",2017
Minions,19,Universal,"$1,159.40",2015
...,...,...,...,...
The Break-Up,763,Universal,$205.00,2006
Everest,766,Universal,$203.40,2015
Patch Adams,772,Universal,$202.30,1998
Kindergarten Cop,775,Universal,$202.00,1990


In [18]:
released_by_universal = (movies["Studio"] == "Universal")
movies[released_by_universal].head()

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jurassic World,6,Universal,"$1,671.70",2015
Furious 7,8,Universal,"$1,516.00",2015
Jurassic World: Fallen Kingdom,13,Universal,"$1,309.50",2018
The Fate of the Furious,17,Universal,"$1,236.00",2017
Minions,19,Universal,"$1,159.40",2015


In [19]:
released_by_universal = movies["Studio"] == "Universal"
released_in_2015 = movies["Year"] == 2015
movies[released_by_universal & released_in_2015]

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jurassic World,6,Universal,"$1,671.70",2015
Furious 7,8,Universal,"$1,516.00",2015
Minions,19,Universal,"$1,159.40",2015
Fifty Shades of Grey,165,Universal,$571.00,2015
Pitch Perfect 2,504,Universal,$287.50,2015
Ted 2,702,Universal,$216.70,2015
Everest,766,Universal,$203.40,2015
Straight Outta Compton,776,Universal,$201.60,2015


In [20]:
released_by_universal = movies["Studio"] == "Universal"
released_in_2015 = movies["Year"] == 2015
movies[released_by_universal | released_in_2015]

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Star Wars: The Force Awakens,4,Buena Vista,"$2,068.20",2015
Jurassic World,6,Universal,"$1,671.70",2015
Furious 7,8,Universal,"$1,516.00",2015
Avengers: Age of Ultron,9,Buena Vista,"$1,405.40",2015
Jurassic World: Fallen Kingdom,13,Universal,"$1,309.50",2018
...,...,...,...,...
The Break-Up,763,Universal,$205.00,2006
Everest,766,Universal,$203.40,2015
Patch Adams,772,Universal,$202.30,1998
Kindergarten Cop,775,Universal,$202.00,1990


In [21]:
before_1975 = movies["Year"] < 1975
movies[before_1975]

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
The Exorcist,252,Warner Brothers,$441.30,1973
Gone with the Wind,288,MGM,$402.40,1939
Bambi,540,RKO,$267.40,1942
The Godfather,604,Paramount,$245.10,1972
101 Dalmatians,708,Buena Vista,$215.90,1961
The Jungle Book,755,Buena Vista,$205.80,1967


In [22]:
mid_80s = movies["Year"].between(1983, 1986)
movies[mid_80s]

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Return of the Jedi,222,Fox,$475.10,1983
Back to the Future,311,Universal,$381.10,1985
Top Gun,357,Paramount,$356.80,1986
Indiana Jones and the Temple of Doom,403,Paramount,$333.10,1984
Crocodile Dundee,413,Paramount,$328.20,1986
Beverly Hills Cop,432,Paramount,$316.40,1984
Rocky IV,467,MGM,$300.50,1985
Rambo: First Blood Part II,469,TriStar,$300.40,1985
Ghostbusters,485,Columbia,$295.20,1984
Out of Africa,662,Universal,$227.50,1985


In [23]:
has_dark_in_title = movies.index.str.lower().str.contains("dark")
movies[has_dark_in_title]

Unnamed: 0_level_0,Rank,Studio,Gross,Year
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Transformers: Dark of the Moon,23,Paramount,"$1,123.80",2011
The Dark Knight Rises,27,Warner Brothers,"$1,084.90",2012
The Dark Knight,39,Warner Brothers,"$1,004.90",2008
Thor: The Dark World,132,Buena Vista,$644.60,2013
Star Trek Into Darkness,232,Paramount,$467.40,2013
Fifty Shades Darker,309,Universal,$381.50,2017
Dark Shadows,600,Warner Brothers,$245.50,2012
Dark Phoenix,603,Fox,$245.10,2019


## Grouping Data

In [24]:
movies["Gross"].str.replace(
    "$", "", regex = False
).str.replace(",", "", regex = False)

Title
Avengers: Endgame               2796.30 
Avatar                          2789.70 
Titanic                         2187.50 
Star Wars: The Force Awakens    2068.20 
Avengers: Infinity War          2048.40 
                                  ...   
Yogi Bear                        201.60 
Garfield: The Movie              200.80 
Cats & Dogs                      200.70 
The Hunt for Red October         200.50 
Valkyrie                         200.30 
Name: Gross, Length: 782, dtype: object

In [25]:
(
    movies["Gross"]
    .str.replace("$", "", regex = False)
    .str.replace(",", "", regex = False)
    .astype(float)
)

Title
Avengers: Endgame               2796.3
Avatar                          2789.7
Titanic                         2187.5
Star Wars: The Force Awakens    2068.2
Avengers: Infinity War          2048.4
                                 ...  
Yogi Bear                        201.6
Garfield: The Movie              200.8
Cats & Dogs                      200.7
The Hunt for Red October         200.5
Valkyrie                         200.3
Name: Gross, Length: 782, dtype: float64

In [26]:
movies["Gross"] = (
    movies["Gross"]
    .str.replace("$", "", regex = False)
    .str.replace(",", "", regex = False)
    .astype(float)
)

In [27]:
movies["Gross"].mean()

439.0308184143222

In [28]:
studios = movies.groupby("Studio")

In [29]:
studios["Gross"].count().head()

Studio
Artisan                     1
Buena Vista               125
CL                          1
China Film Corporation      1
Columbia                    5
Name: Gross, dtype: int64

In [30]:
studios["Gross"].count().sort_values(ascending = False).head()

Studio
Warner Brothers    132
Buena Vista        125
Fox                117
Universal          109
Sony                86
Name: Gross, dtype: int64

In [31]:
studios["Gross"].sum().head()

Studio
Artisan                     248.6
Buena Vista               73585.0
CL                          228.1
China Film Corporation      699.8
Columbia                   1276.6
Name: Gross, dtype: float64

In [32]:
studios["Gross"].sum().sort_values(ascending = False).head()

Studio
Buena Vista        73585.0
Warner Brothers    58643.8
Fox                50420.8
Universal          44302.3
Sony               32822.5
Name: Gross, dtype: float64