In [1]:
import pandas as pd
import numpy as np

In [2]:
city_data = {
    "City": ["New York City", "Paris", "Barcelona", "Rome", "Atlanta"],
    "Country": ["United States", "France", "Spain", "Italy", "United States"],
    "Population": [8600000, 2141000, 551500, 2873000, 6013000]
}
cities = pd.DataFrame(city_data)
cities

Unnamed: 0,City,Country,Population
0,New York City,United States,8600000
1,Paris,France,2141000
2,Barcelona,Spain,551500
3,Rome,Italy,2873000
4,Atlanta,United States,6013000


In [3]:
cities.transpose()

Unnamed: 0,0,1,2,3,4
City,New York City,Paris,Barcelona,Rome,Atlanta
Country,United States,France,Spain,Italy,United States
Population,8600000,2141000,551500,2873000,6013000


In [4]:
random_data = np.random.randint(1, 100, [3, 5])
random_data

array([[42, 61, 86, 11, 24],
       [83,  3,  5, 25, 98],
       [26, 76, 88, 11, 95]])

In [5]:
pd.DataFrame(data = random_data)

Unnamed: 0,0,1,2,3,4
0,42,61,86,11,24
1,83,3,5,25,98
2,26,76,88,11,95


In [6]:
row_labels = ["Morning", "Noon", "Night"]
temperatures = pd.DataFrame(data = random_data, index=row_labels)
temperatures

Unnamed: 0,0,1,2,3,4
Morning,42,61,86,11,24
Noon,83,3,5,25,98
Night,26,76,88,11,95


In [7]:
column_labels = (
    "Monday",
    "Tuesday", 
    "Wednesday",
    "Thursday",
    "Friday"
)
pd.DataFrame(
    data = random_data,
    index = row_labels,
    columns = column_labels,
)

Unnamed: 0,Monday,Tuesday,Wednesday,Thursday,Friday
Morning,42,61,86,11,24
Noon,83,3,5,25,98
Night,26,76,88,11,95


In [8]:
row_labels = ["Morning", "Afternoon", "Morning"]
column_labels = ("Monday", "Tuesday", "Wednesday", "Tuesday", "Friday")
pd.DataFrame(
    data = random_data,
    index = row_labels,
    columns = column_labels,
)

Unnamed: 0,Monday,Tuesday,Wednesday,Tuesday.1,Friday
Morning,42,61,86,11,24
Afternoon,83,3,5,25,98
Morning,26,76,88,11,95


## Similiarities between Series and DataFrames

In [9]:
data_dir = "../../pandas-in-action/chapter_04_the_dataframe_object/"
nba_file = data_dir + "nba.csv"

In [10]:
pd.read_csv(nba_file)

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697
1,Christian Wood,Detroit Pistons,PF,9/27/95,1645357
2,PJ Washington,Charlotte Hornets,PF,8/23/98,3831840
3,Derrick Rose,Detroit Pistons,PG,10/4/88,7317074
4,Marial Shayok,Philadelphia 76ers,G,7/26/95,79568
...,...,...,...,...,...
445,Austin Rivers,Houston Rockets,PG,8/1/92,2174310
446,Harry Giles,Sacramento Kings,PF,4/22/98,2578800
447,Robin Lopez,Milwaukee Bucks,C,4/1/88,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1/4/99,4764960


In [11]:
nba = pd.read_csv(nba_file, parse_dates = ["Birthday"])
nba

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
...,...,...,...,...,...
445,Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
446,Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960


In [12]:
pd.Series([1,2,3]).dtype

dtype('int64')

In [13]:
nba.dtypes

Name                object
Team                object
Position            object
Birthday    datetime64[ns]
Salary               int64
dtype: object

In [14]:
nba.dtypes.value_counts()

object            3
datetime64[ns]    1
int64             1
dtype: int64

In [15]:
nba.index

RangeIndex(start=0, stop=450, step=1)

In [16]:
nba.columns

Index(['Name', 'Team', 'Position', 'Birthday', 'Salary'], dtype='object')

In [17]:
nba.ndim

2

In [18]:
nba.shape

(450, 5)

In [19]:
nba.size

2250

In [20]:
nba.count()

Name        450
Team        450
Position    450
Birthday    450
Salary      450
dtype: int64

In [21]:
nba.count().sum()

2250

In [22]:
data = {
    "A": [1, np.nan],
    "B": [2, 3]
}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B
0,1.0,2
1,,3


In [23]:
df.size

4

In [24]:
df.count()

A    1
B    2
dtype: int64

In [25]:
df.count().sum()

3

In [26]:
nba.head(2)

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357


In [27]:
nba.tail(n = 3)

Unnamed: 0,Name,Team,Position,Birthday,Salary
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
449,Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000


In [28]:
nba.sample(3)

Unnamed: 0,Name,Team,Position,Birthday,Salary
230,Khris Middleton,Milwaukee Bucks,SF,1991-08-12,30603448
227,Amir Coffey,Los Angeles Clippers,G,1997-06-17,79568
225,Tomas Satoransky,Chicago Bulls,PG,1991-10-30,10000000


In [29]:
nba.nunique()

Name        450
Team         30
Position      9
Birthday    430
Salary      269
dtype: int64

In [30]:
nba.max()

Name             Zylan Cheatham
Team         Washington Wizards
Position                     SG
Birthday    2000-12-23 00:00:00
Salary                 40231758
dtype: object

In [31]:
nba.min()

Name               Aaron Gordon
Team              Atlanta Hawks
Position                      C
Birthday    1977-01-26 00:00:00
Salary                    79568
dtype: object

In [32]:
nba.nlargest(n = 4, columns="Salary")

Unnamed: 0,Name,Team,Position,Birthday,Salary
205,Stephen Curry,Golden State Warriors,PG,1988-03-14,40231758
38,Chris Paul,Oklahoma City Thunder,PG,1985-05-06,38506482
219,Russell Westbrook,Houston Rockets,PG,1988-11-12,38506482
251,John Wall,Washington Wizards,PG,1990-09-06,38199000


In [33]:
nba.nsmallest(n=5, columns="Birthday")

Unnamed: 0,Name,Team,Position,Birthday,Salary
98,Vince Carter,Atlanta Hawks,PF,1977-01-26,2564753
196,Udonis Haslem,Miami Heat,C,1980-06-09,2564753
262,Kyle Korver,Milwaukee Bucks,PF,1981-03-17,6004753
149,Tyson Chandler,Houston Rockets,C,1982-10-02,2564753
415,Andre Iguodala,Memphis Grizzlies,SF,1984-01-28,17185185


In [34]:
nba.sum()

  nba.sum()


Name        Shake MiltonChristian WoodPJ WashingtonDerrick...
Team        Philadelphia 76ersDetroit PistonsCharlotte Hor...
Position    SGPFPFPGGPFSGSFCSFPGPGFCPGSGPFCCPFPFSGPFPGSGSF...
Salary                                             3444112694
dtype: object

In [35]:
nba.mean(numeric_only=True)

Salary    7.653584e+06
dtype: float64

In [36]:
nba.median(numeric_only = True)

Salary    3303074.5
dtype: float64

In [37]:
print(nba.mean(numeric_only=True), nba.std(numeric_only=True))

Salary    7.653584e+06
dtype: float64 Salary    9.288810e+06
dtype: float64


## Sorting a DataFrame

In [38]:
nba.sort_values(by="Name")

Unnamed: 0,Name,Team,Position,Birthday,Salary
52,Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
101,Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
437,Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
81,Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
399,Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
...,...,...,...,...,...
159,Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000
302,Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
312,Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440


In [40]:
nba.sort_values("Name", ascending=False).head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
248,Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
312,Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
302,Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
159,Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000


In [41]:
nba.sort_values(by="Birthday", ascending=False).head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
136,Sekou Doumbouya,Detroit Pistons,SF,2000-12-23,3285120
432,Talen Horton-Tucker,Los Angeles Lakers,GF,2000-11-25,898310
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
313,RJ Barrett,New York Knicks,SG,2000-06-14,7839960
392,Jalen Lecque,Phoenix Suns,G,2000-06-13,898310


In [42]:
nba.sort_values(by = ["Team", "Name"])

Unnamed: 0,Name,Team,Position,Birthday,Salary
359,Alex Len,Atlanta Hawks,C,1993-06-16,4160000
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000
276,Brandon Goodwin,Atlanta Hawks,PG,1995-10-02,79568
438,Bruno Fernando,Atlanta Hawks,C,1998-08-15,1400000
194,Cam Reddish,Atlanta Hawks,SF,1999-09-01,4245720
...,...,...,...,...,...
418,Jordan McRae,Washington Wizards,PG,1991-03-28,1645357
273,Justin Robinson,Washington Wizards,PG,1997-10-12,898310
428,Moritz Wagner,Washington Wizards,C,1997-04-26,2063520
21,Rui Hachimura,Washington Wizards,PF,1998-02-08,4469160


In [45]:
nba = nba.sort_values(
    by = ["Team", "Salary"], ascending = [True, False]
)
nba.head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
111,Chandler Parsons,Atlanta Hawks,SF,1988-10-25,25102512
28,Evan Turner,Atlanta Hawks,PG,1988-10-27,18606556
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000
213,De'Andre Hunter,Atlanta Hawks,SF,1997-12-02,7068360
339,Jabari Parker,Atlanta Hawks,PF,1995-03-15,6500000


In [47]:
nba.sort_index().head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568


In [48]:
nba.sort_index(axis=1).head()

Unnamed: 0,Birthday,Name,Position,Salary,Team
111,1988-10-25,Chandler Parsons,SF,25102512,Atlanta Hawks
28,1988-10-27,Evan Turner,PG,18606556,Atlanta Hawks
167,1992-04-09,Allen Crabbe,SG,18500000,Atlanta Hawks
213,1997-12-02,De'Andre Hunter,SF,7068360,Atlanta Hawks
339,1995-03-15,Jabari Parker,PF,6500000,Atlanta Hawks


## Setting a new index

In [50]:
nba = nba.set_index("Name")

In [51]:
nba = pd.read_csv(nba_file, parse_dates=["Birthday"], index_col="Name")

## Selecting columns and rows from a DataFrame

In [52]:
# Every column is available as an attribute 
nba.Salary

Name
Shake Milton       1445697
Christian Wood     1645357
PJ Washington      3831840
Derrick Rose       7317074
Marial Shayok        79568
                    ...   
Austin Rivers      2174310
Harry Giles        2578800
Robin Lopez        4767000
Collin Sexton      4764960
Ricky Rubio       16200000
Name: Salary, Length: 450, dtype: int64

In [54]:
# Can also pass column name in brackets
nba["Position"]

Name
Shake Milton      SG
Christian Wood    PF
PJ Washington     PF
Derrick Rose      PG
Marial Shayok      G
                  ..
Austin Rivers     PG
Harry Giles       PF
Robin Lopez        C
Collin Sexton     PG
Ricky Rubio       PG
Name: Position, Length: 450, dtype: object

In [55]:
# to get multiple columns put a list within brackets
nba[["Salary", "Birthday"]]

Unnamed: 0_level_0,Salary,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Shake Milton,1445697,1996-09-26
Christian Wood,1645357,1995-09-27
PJ Washington,3831840,1998-08-23
Derrick Rose,7317074,1988-10-04
Marial Shayok,79568,1995-07-26
...,...,...
Austin Rivers,2174310,1992-08-01
Harry Giles,2578800,1998-04-22
Robin Lopez,4767000,1988-04-01
Collin Sexton,4764960,1999-01-04


In [57]:
# can retrieve columns of a particular data type
nba.select_dtypes(include="object")

Unnamed: 0_level_0,Team,Position
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Shake Milton,Philadelphia 76ers,SG
Christian Wood,Detroit Pistons,PF
PJ Washington,Charlotte Hornets,PF
Derrick Rose,Detroit Pistons,PG
Marial Shayok,Philadelphia 76ers,G
...,...,...
Austin Rivers,Houston Rockets,PG
Harry Giles,Sacramento Kings,PF
Robin Lopez,Milwaukee Bucks,C
Collin Sexton,Cleveland Cavaliers,PG


In [58]:
nba.select_dtypes(exclude = ["object", "int"])

Unnamed: 0_level_0,Birthday
Name,Unnamed: 1_level_1
Shake Milton,1996-09-26
Christian Wood,1995-09-27
PJ Washington,1998-08-23
Derrick Rose,1988-10-04
Marial Shayok,1995-07-26
...,...
Austin Rivers,1992-08-01
Harry Giles,1998-04-22
Robin Lopez,1988-04-01
Collin Sexton,1999-01-04


## Selecting rows from a DataFrame

In [59]:
# loc selects a row by index label
nba.loc["LeBron James"]
## Also accepts a list
nba.loc[["Kawhi Leonard", "Paul George"]]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kawhi Leonard,Los Angeles Clippers,SF,1991-06-29,32742000
Paul George,Los Angeles Clippers,SF,1990-05-02,33005556


In [62]:
# Can also pull a range with : , is faster if you sort first
nba.sort_index().loc["Otto Porter":"Patrick Beverley"]
# Note that it includes upper bound, where python's slice does not

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Otto Porter,Chicago Bulls,SF,1993-06-03,27250576
PJ Dozier,Denver Nuggets,PG,1996-10-25,79568
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Pascal Siakam,Toronto Raptors,PF,1994-04-02,2351838
Pat Connaughton,Milwaukee Bucks,SG,1993-01-06,1723050
Patrick Beverley,Los Angeles Clippers,PG,1988-07-12,12345680


In [63]:
nba.sort_index().loc["Zach Collins" :]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Zach Collins,Portland Trail Blazers,C,1997-11-19,4240200
Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000
Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568


In [65]:
nba.sort_index().loc[:"Al Horford"]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
Al Horford,Philadelphia 76ers,C,1986-06-03,28000000


In [66]:
# Use iloc to pull by index position
nba.iloc[300]

Team             Denver Nuggets
Position                     PF
Birthday    1999-04-03 00:00:00
Salary                  1416852
Name: Jarred Vanderbilt, dtype: object

In [67]:
nba.iloc[[100,200,300]]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brian Bowen,Indiana Pacers,SG,1998-10-02,79568
Marco Belinelli,San Antonio Spurs,SF,1986-03-25,5846154
Jarred Vanderbilt,Denver Nuggets,PF,1999-04-03,1416852


In [68]:
# iloc can also take a range, but it does not include the last, so like python
nba.iloc[400:404]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Louis King,Detroit Pistons,F,1999-04-06,79568
Kostas Antetokounmpo,Los Angeles Lakers,PF,1997-11-20,79568
Rodions Kurucs,Brooklyn Nets,PF,1998-02-05,1699236
Spencer Dinwiddie,Brooklyn Nets,PG,1993-04-06,10605600


In [69]:
nba.iloc[:2]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
Christian Wood,Detroit Pistons,PF,1995-09-27,1645357


In [70]:
nba.iloc[447:]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000


In [71]:
nba.iloc[-10:-6]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jared Dudley,Los Angeles Lakers,PF,1985-07-10,2564753
Max Strus,Chicago Bulls,SG,1996-03-28,79568
Kevon Looney,Golden State Warriors,C,1996-02-06,4464286
Willy Hernangomez,Charlotte Hornets,C,1994-05-27,1557250


In [72]:
nba.iloc[0:10:2]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
Kendrick Nunn,Miami Heat,SG,1995-08-03,1416852
Brook Lopez,Milwaukee Bucks,C,1988-04-01,12093024


In [76]:
nba.loc["James Harden", ["Position", "Birthday"]]

Position                     PG
Birthday    1989-08-26 00:00:00
Name: James Harden, dtype: object

In [77]:
nba.loc[["Russell Westbrook", "Anthony Davis"],["Team", "Salary"]]

Unnamed: 0_level_0,Team,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Russell Westbrook,Houston Rockets,38506482
Anthony Davis,Los Angeles Lakers,27093019


In [78]:
nba.loc["Joel Embiid", "Position":"Salary"]

Position                      C
Birthday    1994-03-16 00:00:00
Salary                 27504630
Name: Joel Embiid, dtype: object

In [81]:
nba.iloc[57,3]

796806

In [82]:
nba.iloc[100:104, :3]

Unnamed: 0_level_0,Team,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brian Bowen,Indiana Pacers,SG,1998-10-02
Aaron Holiday,Indiana Pacers,PG,1996-09-30
Troy Daniels,Los Angeles Lakers,SG,1991-07-15
Buddy Hield,Sacramento Kings,SG,1992-12-17


In [83]:
# at and iat are faster when you want a particular value
nba.at["Austin Rivers", "Birthday"]
nba.iat[263,1]

'PF'

## Extracting values from Series
loc, iloc, at, and iat also work for series

In [84]:
nba["Salary"].loc["Damian Lillard"]

29802321

In [85]:
nba["Salary"].iat[234]

2033160

## Renaming columns or rows

In [86]:
nba.columns

Index(['Team', 'Position', 'Birthday', 'Salary'], dtype='object')

In [87]:
nba.columns = ["Team", "Position", "DoB", "Pay"]
nba.head(1)

Unnamed: 0_level_0,Team,Position,DoB,Pay
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697


In [89]:
nba = nba.rename(columns = { "DoB": "Birthday"})

In [90]:
nba.loc["James Harden"]

Team            Houston Rockets
Position                     PG
Birthday    1989-08-26 00:00:00
Pay                    38199000
Name: James Harden, dtype: object

In [91]:
nba = nba.rename(
    index = {"James Harden": "The Beard"}
)

In [92]:
nba.loc["The Beard"]

Team            Houston Rockets
Position                     PG
Birthday    1989-08-26 00:00:00
Pay                    38199000
Name: The Beard, dtype: object

## Resetting an index

In [94]:
# If we just set a new index, the old one, and its data, disappears
nba.set_index("Team").head()

Unnamed: 0_level_0,Position,Birthday,Pay
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Philadelphia 76ers,SG,1996-09-26,1445697
Detroit Pistons,PF,1995-09-27,1645357
Charlotte Hornets,PF,1998-08-23,3831840
Detroit Pistons,PG,1988-10-04,7317074
Philadelphia 76ers,G,1995-07-26,79568


In [95]:
# reset_index moves the current index to a column
nba.reset_index().head()

Unnamed: 0,Name,Team,Position,Birthday,Pay
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568


In [96]:
nba.reset_index().set_index("Team").head()

Unnamed: 0_level_0,Name,Position,Birthday,Pay
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Philadelphia 76ers,Shake Milton,SG,1996-09-26,1445697
Detroit Pistons,Christian Wood,PF,1995-09-27,1645357
Charlotte Hornets,PJ Washington,PF,1998-08-23,3831840
Detroit Pistons,Derrick Rose,PG,1988-10-04,7317074
Philadelphia 76ers,Marial Shayok,G,1995-07-26,79568


## Coding challenge

In [97]:
data_dir = "../../pandas-in-action/chapter_04_the_dataframe_object/"
nba_file = data_dir + "nfl.csv"

In [108]:
nfl = pd.read_csv(nba_file, parse_dates=["Birthday"], index_col="Name")

In [109]:
nfl.head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
Adam Butler,New England Patriots,DT,1994-04-12,645000
Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000


AttributeError: 'DataFrame' object has no attribute 'Name'

In [115]:
nfl.Team.value_counts().sum()

1655

In [122]:
# 5 highest paid players
nfl.sort_values(["Salary"], ascending=False).head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kirk Cousins,Minnesota Vikings,QB,1988-08-19,27500000
Jameis Winston,Tampa Bay Buccaneers,QB,1994-01-06,20922000
Marcus Mariota,Tennessee Titans,QB,1993-10-30,20922000
Derek Carr,Oakland Raiders,QB,1991-03-28,19900000
Jimmy Garoppolo,San Francisco 49Ers,QB,1991-11-02,17200000


In [123]:
# Sort by team asc, then slaray desc
nfl.sort_values(["Team", "Salary"], ascending = [True, False])

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chandler Jones,Arizona Cardinals,OLB,1990-02-27,16500000
Patrick Peterson,Arizona Cardinals,CB,1990-07-11,11000000
Larry Fitzgerald,Arizona Cardinals,WR,1983-08-31,11000000
David Johnson,Arizona Cardinals,RB,1991-12-16,5700000
Justin Pugh,Arizona Cardinals,G,1990-08-15,5000000
...,...,...,...,...
Ross Pierschbacher,Washington Redskins,C,1995-05-05,495000
Kelvin Harmon,Washington Redskins,WR,1996-12-15,495000
Wes Martin,Washington Redskins,G,1996-05-09,495000
Jimmy Moreland,Washington Redskins,CB,1995-08-26,495000
