In [3]:
import pandas as pd 

In [4]:
nba = pd.read_csv("nba.csv")
# if I see NaN, the value is missing. In excel, its a blank cell
# index is generated from pandas, not from the csv
# pandas converts all ints to float in a column if it has NaN in the column
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


### Shared  Methods and Attributes

In [11]:
nba = pd.read_csv("nba.csv")

In [14]:
nba.head()
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [15]:
nba.index

RangeIndex(start=0, stop=458, step=1)

In [16]:
nba.values

array([['Avery Bradley', 'Boston Celtics', 0.0, ..., 180.0, 'Texas',
        7730337.0],
       ['Jae Crowder', 'Boston Celtics', 99.0, ..., 235.0, 'Marquette',
        6796117.0],
       ['John Holland', 'Boston Celtics', 30.0, ..., 205.0,
        'Boston University', nan],
       ...,
       ['Tibor Pleiss', 'Utah Jazz', 21.0, ..., 256.0, nan, 2900000.0],
       ['Jeff Withey', 'Utah Jazz', 24.0, ..., 231.0, 'Kansas', 947276.0],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

In [18]:
nba.shape # (rows, columns)

(458, 9)

In [19]:
nba.dtypes

Name         object
Team         object
Number      float64
Position     object
Age         float64
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [20]:
nba.columns

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [21]:
nba.axes

[RangeIndex(start=0, stop=458, step=1),
 Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
        'College', 'Salary'],
       dtype='object')]

In [23]:
nba.info() # more popular method exclusive to DataFrames

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     373 non-null object
Salary      446 non-null float64
dtypes: float64(4), object(5)
memory usage: 32.3+ KB


### Differences between shared methods

In [6]:
rev = pd.read_csv("revenue.csv",index_col = "Date")
rev.head(3)

Unnamed: 0_level_0,New York,Los Angeles,Miami
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/16,985,122,499
1/2/16,738,788,534
1/3/16,14,20,933


In [8]:
s = pd.Series([1,2,3])
s.sum()

6

In [15]:
rev.sum(axis="columns") # axis param = 1 or 0, "index" or "columns"

Date
1/1/16     1606
1/2/16     2060
1/3/16      967
1/4/16     2519
1/5/16      438
1/6/16     1935
1/7/16     1234
1/8/16     2313
1/9/16     2623
1/10/16     555
dtype: int64

In [17]:
type(rev.sum())

pandas.core.series.Series

### Select One Column from a DataFrame

In [21]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [29]:
# 1st approach, wont work if column names have spaces
nba.Name #case-sensitive, Series is returned
nba.Number
nba.Salary

Output = None #no output

In [32]:
# 2nd Approach
nba["Name"] # same as above, returns new series
type(nba["Number"])

pandas.core.series.Series

In [33]:
nba["Name"].head()

0    Avery Bradley
1      Jae Crowder
2     John Holland
3      R.J. Hunter
4    Jonas Jerebko
Name: Name, dtype: object

### Select Two or More Columns from a DataFrame

In [34]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [36]:
nba[["Team","Name"]].head() #returns a new DataFrame, cols can appears in dif order

Unnamed: 0,Team,Name
0,Boston Celtics,Avery Bradley
1,Boston Celtics,Jae Crowder
2,Boston Celtics,John Holland
3,Boston Celtics,R.J. Hunter
4,Boston Celtics,Jonas Jerebko


In [38]:
nba[["Number","College","Name"]].head()

Unnamed: 0,Number,College,Name
0,0.0,Texas,Avery Bradley
1,99.0,Marquette,Jae Crowder
2,30.0,Boston University,John Holland
3,28.0,Georgia State,R.J. Hunter
4,8.0,,Jonas Jerebko


In [40]:
extractList = ["Salary","Team","Name"]
nba[extractList].head()

Unnamed: 0,Salary,Team,Name
0,7730337.0,Boston Celtics,Avery Bradley
1,6796117.0,Boston Celtics,Jae Crowder
2,,Boston Celtics,John Holland
3,1148640.0,Boston Celtics,R.J. Hunter
4,5000000.0,Boston Celtics,Jonas Jerebko


In [43]:
extractOne = ["Salary"]
nba[extractOne].head()

Unnamed: 0,Salary
0,7730337.0
1,6796117.0
2,
3,1148640.0
4,5000000.0


### Add New Column to DataFrame

In [44]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [45]:
nba["Sports"] = "Basektball" # used to create a new column or overwrite the data 

In [47]:
# The above code makes all the rows equal to basketball -- Scalar value = "Basektball"
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sports
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basektball
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basektball
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basektball
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,Basektball
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Basektball


In [48]:
nba["League"] = "National Basektball Association" # added at the end of the index
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sports,League
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basektball,National Basektball Association
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basektball,National Basektball Association
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basektball,National Basektball Association


In [52]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [53]:
# 2nd Approach
nba.insert(3,"Sports","Basketball") # takes over the 3rd index and pushes everything to the right
nba.head()

Unnamed: 0,Name,Team,Number,Sports,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,Basketball,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,Basketball,PF,29.0,6-10,231.0,,5000000.0


In [54]:
nba.insert(7, column="League", value="NBA") #add at specific indice
nba.head(3)

Unnamed: 0,Name,Team,Number,Sports,Position,Age,Height,League,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,NBA,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,NBA,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,NBA,205.0,Boston University,


## Broadcasting Operations 
(Rather than using the apply method)

In [55]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [65]:
nba["Age"].add(5) # adds 5 to every value of the series
nba["Age"] + 5  # pandas figures this syntax and outputs the same results
                # same applies for every mathematical operation
nba["Salary"].sub(500000)
nba["Salary"] - 5000000

nba["Weight"].mul(0.453592)
nba["Weight in kg"] = nba["Weight"] * 0.453592 # makes a new column with the each row multiplied by the value
nba.head(2)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Weight in kg
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,81.64656
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,106.59412


In [68]:
nba["Salary"].div(1000000)
nba["Salary in millions"] = nba["Salary"] / 1000000

In [69]:
nba.head(2)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Weight in kg,Salary in millions
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,81.64656,7.730337
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,106.59412,6.796117


## A Review of the .value_counts() Method

In [4]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [5]:
# how many players are on a team?
nba["Team"].value_counts() # can only be used on series
nba["Position"].value_counts().head(1)
nba["Weight"].value_counts().tail(1)
nba["Salary"].value_counts().head(3)

947276.0    31
845059.0    18
525093.0    13
Name: Salary, dtype: int64

## Drop Rows with Null Values ( dropna() NaN)

In [9]:
nba = pd.read_csv("nba.csv")
nba.head(3)
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [10]:
nba.dropna() # removes any row with a NaN/null value
             # set by the "how" parameter
             # "how" also has "all" value
nba.dropna(how = "all")
             # removes any row where all of its values are NaN/null
             # returns a new pandas DataFrame
nba.dropna(how = "all", inplace = True)

In [11]:
# the row with all NaN values is gone
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [13]:
nba.dropna(axis=1).head() # axis=0 removes rows, axis=1 removes columns 
                   # can also say "columns" or "rows"

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0


In [14]:
# if i want to remove a null value from salary column
# will drop null value rows in "Salary" column
nba.dropna(subset=["Salary", "College"]).head() 

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0


## Fill in Null Values with the .fillna() Method

In [40]:
nba = pd.read_csv("nba.csv")
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [15]:
nba.fillna(0).tail() # this doesn't work well
              # bc strings columns will be filled with 0's too

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,0,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,0,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [42]:
nba["Salary"].fillna(0, inplace=True) # this series references the original DataFrame

In [43]:
nba["College"].fillna("No College", inplace=True)
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,No College,5000000.0


## The .astype() Method

In [16]:
# .astype() can't be executed with null (NaN) values
# executed on Series objects
#
nba = pd.read_csv("nba.csv").dropna(how="all")
nba["Salary"].fillna(0, inplace=True)
nba["College"].fillna("None", inplace=True)
nba.head(6)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0


In [19]:
nba.dtypes # tells us the types in our dataset (type for each column)
           # what if I want ints instead of floats
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     457 non-null object
Salary      457 non-null int32
dtypes: float64(3), int32(1), object(5)
memory usage: 33.9+ KB


In [18]:
nba["Salary"] = nba["Salary"].astype("int") # treats the salary column as integer values
                            # does not have inplac, therefore have to reassign

In [22]:
nba["Age"] = nba["Age"].astype("int")
nba["Number"] = nba["Number"].astype("int")
nba.head()
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null int32
Position    457 non-null object
Age         457 non-null int32
Height      457 non-null object
Weight      457 non-null float64
College     457 non-null object
Salary      457 non-null int32
dtypes: float64(1), int32(3), object(5)
memory usage: 30.3+ KB


In [23]:
# if a column has the same strings repeated in each of its rows, convert the 
# column data type into a "category"
# .astype("category") used when i have duplicate values
#
# position column has 5 unique values
# converting a column into a category to reduce the memory usage
nba["Position"].nunique()

5

In [25]:
nba["Position"] = nba["Position"].astype("category")
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null int32
Position    457 non-null category
Age         457 non-null int32
Height      457 non-null object
Weight      457 non-null float64
College     457 non-null object
Salary      457 non-null int32
dtypes: category(1), float64(1), int32(3), object(4)
memory usage: 27.4+ KB


In [26]:
# notice the decrease in memory usage after calling the .info() method
nba["Team"] = nba["Team"].astype("category")
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null category
Number      457 non-null int32
Position    457 non-null category
Age         457 non-null int32
Height      457 non-null object
Weight      457 non-null float64
College     457 non-null object
Salary      457 non-null int32
dtypes: category(2), float64(1), int32(3), object(3)
memory usage: 25.8+ KB


## Sort a DataFrame with the .sort_values() Method, Part I

In [27]:
nba = pd.read_csv("nba.csv")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [34]:
# used on DataFrame objects
# parameter required is "by" -- give in a column so it sorts the entire DataFrame
# based on the sorted "Column"
nba.sort_values("Name", ascending=False)

nba.sort_values("Age", ascending = False)

nba.sort_values("Salary", ascending = False, inplace = True)
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
109,Kobe Bryant,Los Angeles Lakers,24.0,SF,37.0,6-6,212.0,,25000000.0
169,LeBron James,Cleveland Cavaliers,23.0,SF,31.0,6-8,250.0,,22970500.0
33,Carmelo Anthony,New York Knicks,7.0,SF,32.0,6-8,240.0,Syracuse,22875000.0


In [17]:
# the .sort_values() sorts the null values at the bottom
# in the na_position = "last" or "first"
nba.sort_values("Salary",ascending = False, inplace = True, na_position="first")
nba.head(2)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
46,Elton Brand,Philadelphia 76ers,42.0,PF,37.0,6-9,254.0,Duke,


## Sort a DataFrame with the .sort_values() Method, Part II

In [18]:
nba = pd.read_csv("nba.csv")
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [19]:
# sorts the Team column first and then the names

nba.sort_values(["Team","Name"]) 
nba.sort_values(["Team","Name"], ascending=[True, False], inplace = True) 

# Team is sorted in alphabetical order, while name is sorted in reverse alphabetical order
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
322,Walter Tavares,Atlanta Hawks,22.0,C,24.0,7-3,260.0,,1000000.0
310,Tim Hardaway Jr.,Atlanta Hawks,10.0,SG,24.0,6-6,205.0,Michigan,1304520.0
321,Tiago Splitter,Atlanta Hawks,11.0,C,31.0,6-11,245.0,,9756250.0
320,Thabo Sefolosha,Atlanta Hawks,25.0,SF,32.0,6-7,220.0,,4000000.0
315,Paul Millsap,Atlanta Hawks,4.0,PF,31.0,6-8,246.0,Louisiana Tech,18671659.0


## Sort DataFrame with the .sort_index() Method

In [20]:
nba = pd.read_csv("nba.csv")
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [12]:
# If the following runs into an identical salary, it will sort the name then

nba.sort_values(["Number","Salary","Name"], inplace=True)
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
291,Orlando Johnson,New Orleans Pelicans,0.0,SG,27.0,6-5,220.0,UC Santa Barbara,55722.0
248,Andrew Goudelock,Houston Rockets,0.0,PG,27.0,6-3,200.0,Charleston,200600.0
347,Josh Richardson,Miami Heat,0.0,SG,22.0,6-6,200.0,Tennessee,525093.0
266,JaMychal Green,Memphis Grizzlies,0.0,PF,25.0,6-9,227.0,Alabama,845059.0
47,Isaiah Canaan,Philadelphia 76ers,0.0,PG,25.0,6-0,201.0,Murray State,947276.0


In [14]:
nba.sort_index(ascending=False,  inplace=True)
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
457,,,,,,,,,
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0


## Rank Values with the .rank() Method

In [19]:
nba = pd.read_csv("nba.csv").dropna(how = "all")
nba["Salary"] = nba["Salary"].fillna(0).astype("int")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0


In [25]:
# .rank() is called on Series
nba["Salary Rank"] = nba["Salary"].rank(ascending=False).astype("int")
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Salary Rank
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337,97
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117,110
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0,452
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640,322
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000,147


In [21]:
nba.sort_values(by = "Salary", ascending = False).tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
397,Axel Toupane,Denver Nuggets,6.0,SG,23.0,6-7,210.0,,
409,Greg Smith,Minnesota Timberwolves,4.0,PF,25.0,6-10,250.0,Fresno State,
457,,,,,,,,,
