**Contents:**
* [.index, .values, .shape, .dtypes, .columns, .axes, .info(), .get_dtype_counts()](#0)
* [Difference between shared methods](#1)
* [Select One Column from a DataFrame](#2)
* [Select Two or More Columns from a DataFrame](#3)
* [Add New Column to DataFrame](#4)
* [Broadcasting Operations](#5)
* [A Review of the .value_counts() Method](#6)
* [Drop Rows with Null Values](#7)
* [Fill in Null Values with the .fillna() Method](#8)
* [The .astype() Method](#9)
* [Sort a DataFrame with the .sort_values() Method](#10)
* [Sort a DataFrame with the .sort_index() Method](#11)
* [Rank Values with the .rank() Method](#12)

In [1]:
import pandas as pd

In [3]:
nba = pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


<a id='0'></a>
## .index, .values, .shape, .dtypes, .columns, .axes, .info(), .get_dtype_counts()

In [5]:
nba.index

RangeIndex(start=0, stop=458, step=1)

In [6]:
nba.values

array([['Avery Bradley', 'Boston Celtics', 0.0, ..., 180.0, 'Texas',
        7730337.0],
       ['Jae Crowder', 'Boston Celtics', 99.0, ..., 235.0, 'Marquette',
        6796117.0],
       ['John Holland', 'Boston Celtics', 30.0, ..., 205.0,
        'Boston University', nan],
       ..., 
       ['Tibor Pleiss', 'Utah Jazz', 21.0, ..., 256.0, nan, 2900000.0],
       ['Jeff Withey', 'Utah Jazz', 24.0, ..., 231.0, 'Kansas', 947276.0],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

In [7]:
nba.shape #number rows,number cols

(458, 9)

In [8]:
nba.dtypes

Name         object
Team         object
Number      float64
Position     object
Age         float64
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [9]:
nba.columns

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [10]:
nba.axes

[RangeIndex(start=0, stop=458, step=1),
 Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
        'College', 'Salary'],
       dtype='object')]

In [11]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     373 non-null object
Salary      446 non-null float64
dtypes: float64(4), object(5)
memory usage: 32.3+ KB


In [13]:
nba.get_dtype_counts()

float64    4
object     5
dtype: int64

<a id='1'></a>
## Difference between shared methods

In [18]:
rev = pd.read_csv("revenue.csv", index_col="Date")
rev.head(3)

Unnamed: 0_level_0,New York,Los Angeles,Miami
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/16,985,122,499
1/2/16,738,788,534
1/3/16,14,20,933


In [21]:
s = pd.Series([1,2,3])
s.sum()

6

In [24]:
rev.sum(axis=0)
rev.sum(axis="index")

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [25]:
rev.sum(axis=1)
rev.sum(axis="columns")

Date
1/1/16     1606
1/2/16     2060
1/3/16      967
1/4/16     2519
1/5/16      438
1/6/16     1935
1/7/16     1234
1/8/16     2313
1/9/16     2623
1/10/16     555
dtype: int64

<a id='2'></a>
## Select One Column from a DataFrame

In [26]:
nba = pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [28]:
nba.Name

Output = None

In [29]:
nba["Name"]

0                Avery Bradley
1                  Jae Crowder
2                 John Holland
3                  R.J. Hunter
4                Jonas Jerebko
5                 Amir Johnson
6                Jordan Mickey
7                 Kelly Olynyk
8                 Terry Rozier
9                 Marcus Smart
10             Jared Sullinger
11               Isaiah Thomas
12                 Evan Turner
13                 James Young
14                Tyler Zeller
15            Bojan Bogdanovic
16                Markel Brown
17             Wayne Ellington
18     Rondae Hollis-Jefferson
19                Jarrett Jack
20              Sergey Karasev
21             Sean Kilpatrick
22                Shane Larkin
23                 Brook Lopez
24            Chris McCullough
25                 Willie Reed
26             Thomas Robinson
27                  Henry Sims
28                Donald Sloan
29              Thaddeus Young
                ...           
428            Al-Farouq Aminu
429     

In [30]:
type(nba["Name"])

pandas.core.series.Series

In [31]:
nba["Name"].head()

0    Avery Bradley
1      Jae Crowder
2     John Holland
3      R.J. Hunter
4    Jonas Jerebko
Name: Name, dtype: object

<a id='3'></a>
## Select Two or More Columns from a DataFrame

In [32]:
nba = pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [35]:
nba[["Team","Name"]].head(3)

Unnamed: 0,Team,Name
0,Boston Celtics,Avery Bradley
1,Boston Celtics,Jae Crowder
2,Boston Celtics,John Holland
3,Boston Celtics,R.J. Hunter
4,Boston Celtics,Jonas Jerebko


In [37]:
nba[["Salary","Team","Name"]].tail(3)

Unnamed: 0,Salary,Team,Name
455,2900000.0,Utah Jazz,Tibor Pleiss
456,947276.0,Utah Jazz,Jeff Withey
457,,,


In [38]:
select = ["Salary","Team","Name"]
nba[select].tail(3)

Unnamed: 0,Salary,Team,Name
455,2900000.0,Utah Jazz,Tibor Pleiss
456,947276.0,Utah Jazz,Jeff Withey
457,,,


<a id='4'></a>
## Add New Column to DataFrame

In [39]:
nba = pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [40]:
nba["Sport"] = "Basketball"

In [41]:
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sport
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basketball
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,Basketball
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Basketball


In [42]:
nba["League"] = "National Basketball Association"
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sport,League
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball,National Basketball Association
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball,National Basketball Association
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basketball,National Basketball Association
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,Basketball,National Basketball Association
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Basketball,National Basketball Association


In [43]:
nba = pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [44]:
nba.insert(3, column= "Sport", value="Basketball")

In [45]:
nba.head(3)

Unnamed: 0,Name,Team,Number,Sport,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,205.0,Boston University,


In [46]:
nba.insert(7, column= "League", value="National Basketball Association")

In [47]:
nba.head(3)

Unnamed: 0,Name,Team,Number,Sport,Position,Age,Height,League,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,National Basketball Association,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,National Basketball Association,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,National Basketball Association,205.0,Boston University,


<a id='5'></a>
## Broadcasting Operations

In [58]:
nba["Age"].add(5)
nba["Age"]+5

nba['Salary'].sub(500000)
nba["Salary"] - 5000000

nba["Weight"].mul(.453592)
nba["Weight in Kilograms"] = nba["Weight"] * .453592
nba.head(3)

Unnamed: 0,Name,Team,Number,Sport,Position,Age,Height,League,Weight,College,Salary,Weight in Kilograms
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,National Basketball Association,180.0,Texas,7730337.0,81.64656
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,National Basketball Association,235.0,Marquette,6796117.0,106.59412
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,National Basketball Association,205.0,Boston University,,92.98636


In [60]:
nba["Salary"].div(1000000)
nba["Salary in Millions"] = nba["Salary"] / 1000000
nba.head(3)

Unnamed: 0,Name,Team,Number,Sport,Position,Age,Height,League,Weight,College,Salary,Weight in Kilograms,Salary in Millions
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,National Basketball Association,180.0,Texas,7730337.0,81.64656,7.730337
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,National Basketball Association,235.0,Marquette,6796117.0,106.59412,6.796117
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,National Basketball Association,205.0,Boston University,,92.98636,


<a id='6'></a>
## A Review of the .value_counts() Method

In [61]:
nba = pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [63]:
nba["Team"].value_counts()

New Orleans Pelicans      19
Memphis Grizzlies         18
Milwaukee Bucks           16
New York Knicks           16
Toronto Raptors           15
Utah Jazz                 15
Chicago Bulls             15
Denver Nuggets            15
Detroit Pistons           15
Charlotte Hornets         15
Sacramento Kings          15
Boston Celtics            15
Houston Rockets           15
Portland Trail Blazers    15
Indiana Pacers            15
Atlanta Hawks             15
Washington Wizards        15
Brooklyn Nets             15
Los Angeles Clippers      15
Los Angeles Lakers        15
Cleveland Cavaliers       15
Golden State Warriors     15
Oklahoma City Thunder     15
Philadelphia 76ers        15
San Antonio Spurs         15
Dallas Mavericks          15
Miami Heat                15
Phoenix Suns              15
Orlando Magic             14
Minnesota Timberwolves    14
Name: Team, dtype: int64

<a id='7'></a>
## Drop Rows with Null Values

In [64]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [65]:
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [77]:
nba.dropna().head() #remove all rows with NA values

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0


In [69]:
nba.dropna(how="all", inplace=True) #drops rows where all values NaN

In [70]:
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [76]:
nba.dropna(axis=1) #remove columns where there are any NULL values
nba.dropna(axis = "columns").head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0


In [74]:
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [75]:
nba.dropna(subset=["Salary", "College"]).head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0


<a id='8'></a>
## Fill in Null Values with the .fillna() Method

In [None]:
nba = pd.read_csv("nba.csv")
nba.head(3)

In [78]:
nba.fillna(0) #don't use with varying data types

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,0,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,0,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


In [80]:
nba.Salary.fillna(0, inplace=True)

In [81]:
nba.head(5)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [83]:
nba["College"].fillna("No College", inplace=True)

In [84]:
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,No College,5000000.0


<a id='9'></a>
## The .astype() Method

In [90]:
nba = pd.read_csv("nba.csv").dropna(how="all")
nba["Salary"].fillna(0, inplace=True)
nba["College"].fillna("None", inplace=True)
nba.head(6)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0


In [94]:
nba.dtypes #objects=strings
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     457 non-null object
Salary      457 non-null float64
dtypes: float64(4), object(5)
memory usage: 35.7+ KB


In [97]:
nba["Salary"] = nba["Salary"].astype("int")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000


In [102]:
nba["Age"] = nba["Age"].astype("int")
nba["Number"] = nba["Number"].astype("int")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0,PG,25,6-2,180.0,Texas,7730337
1,Jae Crowder,Boston Celtics,99,SF,25,6-6,235.0,Marquette,6796117
2,John Holland,Boston Celtics,30,SG,27,6-5,205.0,Boston University,0
3,R.J. Hunter,Boston Celtics,28,SG,22,6-5,185.0,Georgia State,1148640
4,Jonas Jerebko,Boston Celtics,8,PF,29,6-10,231.0,,5000000


In [101]:
#nba["Age"] = nba["Age"].astype("float")

In [104]:
nba["Position"].nunique()

5

In [107]:
nba["Position"] = nba["Position"].astype("category")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0,PG,25,6-2,180.0,Texas,7730337
1,Jae Crowder,Boston Celtics,99,SF,25,6-6,235.0,Marquette,6796117
2,John Holland,Boston Celtics,30,SG,27,6-5,205.0,Boston University,0
3,R.J. Hunter,Boston Celtics,28,SG,22,6-5,185.0,Georgia State,1148640
4,Jonas Jerebko,Boston Celtics,8,PF,29,6-10,231.0,,5000000


In [108]:
nba["Team"].nunique()

30

In [109]:
nba["Team"] = nba["Team"].astype("category")

In [110]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null category
Number      457 non-null int32
Position    457 non-null category
Age         457 non-null int32
Height      457 non-null object
Weight      457 non-null float64
College     457 non-null object
Salary      457 non-null int32
dtypes: category(2), float64(1), int32(3), object(3)
memory usage: 45.8+ KB


<a id='10'></a>
## Sort a DataFrame with the .sort_values() Method

In [111]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [115]:
nba.sort_values("Name", ascending=False, na_position="last").head() # or "first"

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
237,Zaza Pachulia,Dallas Mavericks,27.0,C,32.0,6-11,275.0,,5200000.0
271,Zach Randolph,Memphis Grizzlies,50.0,PF,34.0,6-9,260.0,Michigan State,9638555.0
402,Zach LaVine,Minnesota Timberwolves,8.0,PG,21.0,6-5,189.0,UCLA,2148360.0
270,Xavier Munford,Memphis Grizzlies,14.0,PG,24.0,6-3,180.0,Rhode Island,
386,Wilson Chandler,Denver Nuggets,21.0,SF,29.0,6-8,225.0,DePaul,10449438.0


In [119]:
nba.sort_values(["Team","Name"], ascending = [True,False], inplace=True)
nba.head(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
322,Walter Tavares,Atlanta Hawks,22.0,C,24.0,7-3,260.0,,1000000.0
310,Tim Hardaway Jr.,Atlanta Hawks,10.0,SG,24.0,6-6,205.0,Michigan,1304520.0
321,Tiago Splitter,Atlanta Hawks,11.0,C,31.0,6-11,245.0,,9756250.0
320,Thabo Sefolosha,Atlanta Hawks,25.0,SF,32.0,6-7,220.0,,4000000.0
315,Paul Millsap,Atlanta Hawks,4.0,PF,31.0,6-8,246.0,Louisiana Tech,18671659.0
319,Mike Scott,Atlanta Hawks,32.0,PF,27.0,6-8,237.0,Virginia,3333333.0
316,Mike Muscala,Atlanta Hawks,31.0,PF,24.0,6-11,240.0,Bucknell,947276.0
317,Lamar Patterson,Atlanta Hawks,13.0,SG,24.0,6-5,225.0,Pittsburgh,525093.0
314,Kyle Korver,Atlanta Hawks,26.0,SG,35.0,6-7,212.0,Creighton,5746479.0
313,Kris Humphries,Atlanta Hawks,43.0,PF,31.0,6-9,235.0,Minnesota,1000000.0


<a id='11'></a>
## Sort a DataFrame with the .sort_index() Method

In [None]:
nba = pd.read_csv("nba.csv")
nba.head(3)

In [123]:
nba.sort_values(["Number","Salary","Name"], inplace=True)
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
68,Lucas Nogueira,Toronto Raptors,92.0,C,23.0,7-0,220.0,,1842000.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
457,,,,,,,,,


In [126]:
nba.sort_index(ascending=False,inplace=True)
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
457,,,,,,,,,
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


<a id='12'></a>
## Rank Values with the .rank() Method

In [131]:
nba = pd.read_csv("nba.csv").dropna(how="all")
nba["Salary"] = nba["Salary"].fillna(0)
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0


In [137]:
nba["Salary Rank"] = nba["Salary"].rank(ascending=False).astype("int")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Salary Rank
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,97
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,110
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0,452


In [140]:
nba.sort_values(by="Salary", ascending=False).head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Salary Rank
109,Kobe Bryant,Los Angeles Lakers,24.0,SF,37.0,6-6,212.0,,25000000.0,1
169,LeBron James,Cleveland Cavaliers,23.0,SF,31.0,6-8,250.0,,22970500.0,2
33,Carmelo Anthony,New York Knicks,7.0,SF,32.0,6-8,240.0,Syracuse,22875000.0,3
251,Dwight Howard,Houston Rockets,12.0,C,30.0,6-11,265.0,,22359364.0,4
339,Chris Bosh,Miami Heat,1.0,PF,32.0,6-11,235.0,Georgia Tech,22192730.0,5
