## Overview of a DataFrame

In [2]:
import pandas as pd
import numpy as np

###  Creating DataFrame from Dictionary

In [3]:
city_data = {
    "City": ["New York City", "Paris", "Barcelona", "Rome"],
    "Country": ["United States", "France", "Spain", "Italy"],
    "Population": pd.Series([8600000, 2141000, 5515000, 2873000])
}

cities = pd.DataFrame(city_data)
cities

Unnamed: 0,City,Country,Population
0,New York City,United States,8600000
1,Paris,France,2141000
2,Barcelona,Spain,5515000
3,Rome,Italy,2873000


In [None]:



# Question 4:
# How can you transpose the cities DataFrame?

In [3]:
# Question 1:
# What is the structure of the cities DataFrame, and what columns does it have?
cities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   City        4 non-null      object
 1   Country     4 non-null      object
 2   Population  4 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 224.0+ bytes


In [4]:
# Question 2:
# Can you print the first few rows of the cities DataFrame?
cities.head()

Unnamed: 0,City,Country,Population
0,New York City,United States,8600000
1,Paris,France,2141000
2,Barcelona,Spain,5515000
3,Rome,Italy,2873000


In [12]:
# Question 3:
# What is the population of Barcelona?
cities.loc[cities["City"] == "Barcelona", "Population"].values[0]

5515000

In [20]:
cities.loc[cities['City']=='Barcelona','Population'].values[0]

5515000

###  Creating a DataFrame from a NumPy ndarray

In [21]:
random_data = np.random.randint(1, 101, [3, 5])
random_data

array([[42, 87, 70, 17, 91],
       [55, 99, 18, 32, 24],
       [42,  6, 41, 35, 54]])

In [22]:
row_labels = ["Morning", "Afternoon", "Evening"]

In [24]:
pd.DataFrame(random_data.T,columns=row_labels)

Unnamed: 0,Morning,Afternoon,Evening
0,42,55,42
1,87,99,6
2,70,18,41
3,17,32,35
4,91,24,54


##  Similarities between Series and DataFrames

###  Importing a DataFrame with the read_csv Function "nba.csv"

In [52]:
nba = pd.read_csv('nba.csv')
nba.head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697
1,Christian Wood,Detroit Pistons,PF,9/27/95,1645357
2,PJ Washington,Charlotte Hornets,PF,8/23/98,3831840
3,Derrick Rose,Detroit Pistons,PG,10/4/88,7317074
4,Marial Shayok,Philadelphia 76ers,G,7/26/95,79568


###  create dataframe named nba use parse_dates

In [53]:
nba = pd.read_csv('nba.csv', parse_dates=['Birthday'])
nba.head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568


###  Shared and Exclusive Attributes between Series and DataFrames

In [54]:
# Question 1: What are the first two rows of the 'nba' DataFrame?
nba.head(2)


Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357


In [31]:
# Question 1: What are the data types of each column in the 'nba' DataFrame?
nba.dtypes

Name                object
Team                object
Position            object
Birthday    datetime64[ns]
Salary               int64
dtype: object

In [35]:
# Question 2: How many columns have the data type 'object' in the 'nba' DataFrame?
(nba.dtypes=='object').sum()

3

In [38]:
# Question 3: How many columns have the data type 'datetime64[ns]' in the 'nba' DataFrame?
(nba.dtypes=='datetime64[ns]').sum()

1

In [39]:
# Question 4: How many columns have the data type 'int64' in the 'nba' DataFrame?
(nba.dtypes =='int64').sum()

1

In [42]:
# Question 5: What type of index does the 'nba' DataFrame have?
nba.index.dtype

dtype('int64')

In [44]:
# Question 6: What are the column names of the 'nba' DataFrame?
nba.columns

Index(['Name', 'Team', 'Position', 'Birthday', 'Salary'], dtype='object')

In [46]:
# Question 7: How many dimensions does the 'nba' DataFrame have?
nba.ndim

2

In [48]:
# Question 8: What is the shape of the 'nba' DataFrame (number of rows and columns)?
nba.shape

(450, 5)

In [50]:
# Question 9: What is the total number of elements in the 'nba' DataFrame?
nba.count()

Name        450
Team        450
Position    450
Birthday    450
Salary      450
dtype: int64

In [53]:
# Question 10: How many non-null values are there in each column of the 'nba' DataFrame?
nba.notnull().count()

Name        450
Team        450
Position    450
Birthday    450
Salary      450
dtype: int64

In [8]:
# Question 11: What is the total number of non-null values in the 'nba' DataFrame?
nba.notnull()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,True,True,True,True,True
1,True,True,True,True,True
2,True,True,True,True,True
3,True,True,True,True,True
4,True,True,True,True,True
...,...,...,...,...,...
445,True,True,True,True,True
446,True,True,True,True,True
447,True,True,True,True,True
448,True,True,True,True,True


###  Shared Methods between Series and DataFrames

In [56]:
# Question 2: What are the last three rows of the 'nba' DataFrame?
nba.tail(3)

Unnamed: 0,Name,Team,Position,Birthday,Salary
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
449,Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000


In [57]:
# Question 3: What are the last five rows of the 'nba' DataFrame?
nba.tail()

Unnamed: 0,Name,Team,Position,Birthday,Salary
445,Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
446,Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
449,Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000


In [59]:
# Question 4: Provide a random sample of three rows from the 'nba' DataFrame.
nba.sample(3)

Unnamed: 0,Name,Team,Position,Birthday,Salary
275,Trevor Ariza,Sacramento Kings,SF,1985-06-30,12200000
97,Chris Silva,Miami Heat,PF,1996-09-19,79568
385,Kobi Simmons,Charlotte Hornets,PG,1997-07-04,79568


In [61]:
# Question 5: How many unique values are there in each column of the 'nba' DataFrame?
nba.nunique()

Name        450
Team         30
Position      9
Birthday    430
Salary      269
dtype: int64

In [62]:
# Question 6: What is the maximum value in each column of the 'nba' DataFrame?
nba.max()

Name             Zylan Cheatham
Team         Washington Wizards
Position                     SG
Birthday    2000-12-23 00:00:00
Salary                 40231758
dtype: object

In [63]:
# Question 7: What is the minimum value in each column of the 'nba' DataFrame?
nba.min()

Name               Aaron Gordon
Team              Atlanta Hawks
Position                      C
Birthday    1977-01-26 00:00:00
Salary                    79568
dtype: object

In [81]:
# Question 8: What are the top four rows with the highest values in the 'Salary' column of the 'nba' DataFrame?
nba.nlargest(4, 'Salary')

Unnamed: 0,Name,Team,Position,Birthday,Salary
205,Stephen Curry,Golden State Warriors,PG,1988-03-14,40231758
38,Chris Paul,Oklahoma City Thunder,PG,1985-05-06,38506482
219,Russell Westbrook,Houston Rockets,PG,1988-11-12,38506482
251,John Wall,Washington Wizards,PG,1990-09-06,38199000


In [82]:
# Question 9: What are the three rows with the smallest values in the 'Birthday' column of the 'nba' DataFrame?
nba.nsmallest(3,'Salary')

Unnamed: 0,Name,Team,Position,Birthday,Salary
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
12,Norvel Pelle,Philadelphia 76ers,FC,1993-02-03,79568
24,Jaylen Hoard,Portland Trail Blazers,SF,1999-03-30,79568


In [83]:
# Question 10: What is the sum of all values in the 'nba' DataFrame?
nba.sum()

  nba.sum()


Name        Shake MiltonChristian WoodPJ WashingtonDerrick...
Team        Philadelphia 76ersDetroit PistonsCharlotte Hor...
Position    SGPFPFPGGPFSGSFCSFPGPGFCPGSGPFCCPFPFSGPFPGSGSF...
Salary                                             3444112694
dtype: object

In [84]:
# Question 11: What is the total sum of the 'Salary' column in the 'nba' DataFrame?
nba['Salary'].sum()

3444112694

In [85]:
# Question 13: What is the median of the 'Salary' column in the 'nba' DataFrame?
nba.Salary.median()

3303074.5

In [86]:
# Question 14: What is the mode of the 'Salary' column in the 'nba' DataFrame?
nba.Salary.mode()

0    79568
Name: Salary, dtype: int64

In [89]:
# Question 15: What is the standard deviation of the 'Salary' column in the 'nba' 
nba.Salary.std()

9288810.298497694

In [90]:
# Question 16: How do you sort the 'nba' DataFrame based on the 'Name' column in ascending order?
nba.sort_values(by = 'Name')

Unnamed: 0,Name,Team,Position,Birthday,Salary
52,Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
101,Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
437,Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
81,Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
399,Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
...,...,...,...,...,...
159,Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000
302,Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
312,Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440


In [94]:
# Question 17: Provide the top five rows of the 'nba' DataFrame after sorting by 'Name' in descending order.
nba.sort_values(by = 'Name',ascending= False).head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
248,Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
312,Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
302,Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
159,Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000


In [95]:
# Question 18: Provide the top five rows of the 'nba' DataFrame after sorting by 'Birthday' in descending order.
nba.sort_values(by = 'Birthday',ascending=False).head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
136,Sekou Doumbouya,Detroit Pistons,SF,2000-12-23,3285120
432,Talen Horton-Tucker,Los Angeles Lakers,GF,2000-11-25,898310
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
313,RJ Barrett,New York Knicks,SG,2000-06-14,7839960
392,Jalen Lecque,Phoenix Suns,G,2000-06-13,898310


In [98]:
# Question 19: How do you sort the 'nba' DataFrame first by 'Team' in ascending order and then by 'Name' in ascending order?
nba.sort_values(by = ['Team','Name'])

Unnamed: 0,Name,Team,Position,Birthday,Salary
359,Alex Len,Atlanta Hawks,C,1993-06-16,4160000
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000
276,Brandon Goodwin,Atlanta Hawks,PG,1995-10-02,79568
438,Bruno Fernando,Atlanta Hawks,C,1998-08-15,1400000
194,Cam Reddish,Atlanta Hawks,SF,1999-09-01,4245720
...,...,...,...,...,...
418,Jordan McRae,Washington Wizards,PG,1991-03-28,1645357
273,Justin Robinson,Washington Wizards,PG,1997-10-12,898310
428,Moritz Wagner,Washington Wizards,C,1997-04-26,2063520
21,Rui Hachimura,Washington Wizards,PF,1998-02-08,4469160


In [97]:
# Question 20: How do you sort the 'nba' DataFrame first by 'Team' in descending order and then by 'Name' in ascending order?
nba.sort_values(by = ['Team','Name'],ascending=[False,True])

Unnamed: 0,Name,Team,Position,Birthday,Salary
399,Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
35,Bradley Beal,Washington Wizards,SG,1993-06-28,27093018
353,Chris Chiozza,Washington Wizards,PG,1995-11-21,79568
226,Davis Bertans,Washington Wizards,PF,1992-11-12,7000000
283,Garrison Mathews,Washington Wizards,SG,1996-10-24,79568
...,...,...,...,...,...
339,Jabari Parker,Atlanta Hawks,PF,1995-03-15,6500000
84,John Collins,Atlanta Hawks,PF,1997-09-23,2686560
20,Kevin Huerter,Atlanta Hawks,SG,1998-08-27,2636280
290,Tyrone Wallace,Atlanta Hawks,PG,1994-06-10,1620564


In [99]:
# Question 21: How do you sort the 'nba' DataFrame first by 'Team' in ascending order and then by 'Salary' in descending order?
nba.sort_values(by = ['Team',"Salary"],ascending=[True,False])

Unnamed: 0,Name,Team,Position,Birthday,Salary
111,Chandler Parsons,Atlanta Hawks,SF,1988-10-25,25102512
28,Evan Turner,Atlanta Hawks,PG,1988-10-27,18606556
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000
213,De'Andre Hunter,Atlanta Hawks,SF,1997-12-02,7068360
339,Jabari Parker,Atlanta Hawks,PF,1995-03-15,6500000
...,...,...,...,...,...
80,Isaac Bonga,Washington Wizards,PG,1999-11-08,1416852
399,Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
273,Justin Robinson,Washington Wizards,PG,1997-10-12,898310
283,Garrison Mathews,Washington Wizards,SG,1996-10-24,79568


In [100]:
# Question 22: After sorting the 'nba' DataFrame by 'Team' in ascending order and 'Salary' in descending order, what is the top row?
nba.sort_values(by = ['Team',"Salary"],ascending=[True,False]).head(1)

Unnamed: 0,Name,Team,Position,Birthday,Salary
111,Chandler Parsons,Atlanta Hawks,SF,1988-10-25,25102512


In [101]:
# Question 23: How do you sort the 'nba' DataFrame by the row index in ascending order?
nba.sort_index()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
...,...,...,...,...,...
445,Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
446,Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960


In [103]:
# Question 24: How do you sort the 'nba' DataFrame by the row index in descending order?
nba.sort_index(ascending=False)

Unnamed: 0,Name,Team,Position,Birthday,Salary
449,Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000
448,Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
447,Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
446,Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
445,Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
...,...,...,...,...,...
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357


In [104]:
# Question 25: How do you sort the columns of the 'nba' DataFrame in ascending order by label?
nba.sort_index(axis=1)

Unnamed: 0,Birthday,Name,Position,Salary,Team
0,1996-09-26,Shake Milton,SG,1445697,Philadelphia 76ers
1,1995-09-27,Christian Wood,PF,1645357,Detroit Pistons
2,1998-08-23,PJ Washington,PF,3831840,Charlotte Hornets
3,1988-10-04,Derrick Rose,PG,7317074,Detroit Pistons
4,1995-07-26,Marial Shayok,G,79568,Philadelphia 76ers
...,...,...,...,...,...
445,1992-08-01,Austin Rivers,PG,2174310,Houston Rockets
446,1998-04-22,Harry Giles,PF,2578800,Sacramento Kings
447,1988-04-01,Robin Lopez,C,4767000,Milwaukee Bucks
448,1999-01-04,Collin Sexton,PG,4764960,Cleveland Cavaliers


In [118]:

# Question 26: How do you sort the columns of the 'nba' DataFrame in descending order by label?
nba.sort_index(axis=1, ascending=False)

Unnamed: 0,Team,Salary,Position,Name,Birthday
0,Philadelphia 76ers,1445697,SG,Shake Milton,1996-09-26
1,Detroit Pistons,1645357,PF,Christian Wood,1995-09-27
2,Charlotte Hornets,3831840,PF,PJ Washington,1998-08-23
3,Detroit Pistons,7317074,PG,Derrick Rose,1988-10-04
4,Philadelphia 76ers,79568,G,Marial Shayok,1995-07-26
...,...,...,...,...,...
445,Houston Rockets,2174310,PG,Austin Rivers,1992-08-01
446,Sacramento Kings,2578800,PF,Harry Giles,1998-04-22
447,Milwaukee Bucks,4767000,C,Robin Lopez,1988-04-01
448,Cleveland Cavaliers,4764960,PG,Collin Sexton,1999-01-04


In [115]:
nba

Unnamed: 0,Team,Salary,Position,Name,Birthday
0,Philadelphia 76ers,1445697,SG,Shake Milton,1996-09-26
1,Detroit Pistons,1645357,PF,Christian Wood,1995-09-27
2,Charlotte Hornets,3831840,PF,PJ Washington,1998-08-23
3,Detroit Pistons,7317074,PG,Derrick Rose,1988-10-04
4,Philadelphia 76ers,79568,G,Marial Shayok,1995-07-26
...,...,...,...,...,...
445,Houston Rockets,2174310,PG,Austin Rivers,1992-08-01
446,Sacramento Kings,2578800,PF,Harry Giles,1998-04-22
447,Milwaukee Bucks,4767000,C,Robin Lopez,1988-04-01
448,Cleveland Cavaliers,4764960,PG,Collin Sexton,1999-01-04


In [119]:
# Question 27: After sorting the columns of the 'nba' DataFrame in ascending order by label, what is the top row?
nba.sort_index(axis=1,ascending=True).head(1)

Unnamed: 0,Birthday,Name,Position,Salary,Team
0,1996-09-26,Shake Milton,SG,1445697,Philadelphia 76ers


In [112]:
nba.sort_index(axis=1, ascending=True).iloc[0]

Birthday    1996-09-26 00:00:00
Name               Shake Milton
Position                     SG
Salary                  1445697
Team         Philadelphia 76ers
Name: 0, dtype: object

In [120]:
# Question 28: After sorting the columns of the 'nba' DataFrame in descending order by label, what is the top row?
nba.sort_index(axis=1, ascending=False).head(1)

Unnamed: 0,Team,Salary,Position,Name,Birthday
0,Philadelphia 76ers,1445697,SG,Shake Milton,1996-09-26


In [121]:
# Question 29: How do you set the 'Name' column as the index of the 'nba' DataFrame?
nba.set_index('Name')

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,1445697,SG,1996-09-26
Christian Wood,Detroit Pistons,1645357,PF,1995-09-27
PJ Washington,Charlotte Hornets,3831840,PF,1998-08-23
Derrick Rose,Detroit Pistons,7317074,PG,1988-10-04
Marial Shayok,Philadelphia 76ers,79568,G,1995-07-26
...,...,...,...,...
Austin Rivers,Houston Rockets,2174310,PG,1992-08-01
Harry Giles,Sacramento Kings,2578800,PF,1998-04-22
Robin Lopez,Milwaukee Bucks,4767000,C,1988-04-01
Collin Sexton,Cleveland Cavaliers,4764960,PG,1999-01-04


In [124]:
# Question 30: What is the 'Salary' column of the 'nba' DataFrame after setting the 'Name' column as the index?
nba.set_index('Name')['Salary']

Name
Shake Milton       1445697
Christian Wood     1645357
PJ Washington      3831840
Derrick Rose       7317074
Marial Shayok        79568
                    ...   
Austin Rivers      2174310
Harry Giles        2578800
Robin Lopez        4767000
Collin Sexton      4764960
Ricky Rubio       16200000
Name: Salary, Length: 450, dtype: int64

In [127]:
# Question 31: What is the 'Position' column of the 'nba' DataFrame after setting the 'Name' column as the index?
nba.set_index('Name')['Position']

Name
Shake Milton      SG
Christian Wood    PF
PJ Washington     PF
Derrick Rose      PG
Marial Shayok      G
                  ..
Austin Rivers     PG
Harry Giles       PF
Robin Lopez        C
Collin Sexton     PG
Ricky Rubio       PG
Name: Position, Length: 450, dtype: object

In [132]:
# Question 32: Provide the top five rows of the 'nba' DataFrame, 
#including only the 'Salary' and 'Birthday' columns, after setting the 'Name' column as the index.
nba.set_index('Name')[['Salary','Birthday']]

Unnamed: 0_level_0,Salary,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Shake Milton,1445697,1996-09-26
Christian Wood,1645357,1995-09-27
PJ Washington,3831840,1998-08-23
Derrick Rose,7317074,1988-10-04
Marial Shayok,79568,1995-07-26
...,...,...
Austin Rivers,2174310,1992-08-01
Harry Giles,2578800,1998-04-22
Robin Lopez,4767000,1988-04-01
Collin Sexton,4764960,1999-01-04


In [133]:
# Question 33: Provide the top five rows of the 'nba' DataFrame, including only the 'Birthday' and 'Salary' columns, 
#after setting the 'Name' column as the index.
nba.set_index('Name')[['Birthday','Salary']]

Unnamed: 0_level_0,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Shake Milton,1996-09-26,1445697
Christian Wood,1995-09-27,1645357
PJ Washington,1998-08-23,3831840
Derrick Rose,1988-10-04,7317074
Marial Shayok,1995-07-26,79568
...,...,...
Austin Rivers,1992-08-01,2174310
Harry Giles,1998-04-22,2578800
Robin Lopez,1988-04-01,4767000
Collin Sexton,1999-01-04,4764960


In [140]:

# Question 34: What are the columns of the 'nba' DataFrame that have the data type 'object'
#after setting the 'Name' column as the index?
nba.set_index('Name').select_dtypes(include='object').columns

Index(['Team', 'Position'], dtype='object')

In [149]:
# Question 35: What is the 'Birthday' column of the 'nba' DataFrame after setting the 'Name' column as the index,
#excluding columns with the data type 'object' and ''?
nba.set_index('Name').select_dtypes(exclude=['object','int'])['Birthday']

Name
Shake Milton     1996-09-26
Christian Wood   1995-09-27
PJ Washington    1998-08-23
Derrick Rose     1988-10-04
Marial Shayok    1995-07-26
                    ...    
Austin Rivers    1992-08-01
Harry Giles      1998-04-22
Robin Lopez      1988-04-01
Collin Sexton    1999-01-04
Ricky Rubio      1990-10-21
Name: Birthday, Length: 450, dtype: datetime64[ns]

In [None]:
new_nba = nba.set_index('Name')

In [184]:
# Question 36: What is the information for the player "LeBron James" in the 'nba' DataFrame after setting the 'Name' column as the index?
new_nba.loc[['LeBron James']]

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LeBron James,Los Angeles Lakers,37436858,PF,1984-12-30


In [183]:
# Question 37: Provide the information for the players "Kawhi Leonard" and "Paul George" in the 'nba' DataFrame after setting the 'Name' column as the index.
new_nba.loc[['Paul George','Kawhi Leonard']]

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Paul George,Los Angeles Clippers,33005556,SF,1990-05-02
Kawhi Leonard,Los Angeles Clippers,32742000,SF,1991-06-29


In [191]:
# Question 38: Provide the information for the players "Paul George" and "Kawhi Leonard" in the 'nba' DataFrame after setting the 'Name' column as the index.
new_nba.loc[['Paul George','Kawhi Leonard']]

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Paul George,Los Angeles Clippers,33005556,SF,1990-05-02
Kawhi Leonard,Los Angeles Clippers,32742000,SF,1991-06-29


In [196]:
# Question 39: Provide the information for players whose names range from "Otto Porter" to "Patrick Beverley" in the 'nba' DataFrame after setting the 'Name' column as the index.
new_nba.loc["Otto Porter":"Patrick Beverley"]

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Otto Porter,Chicago Bulls,27250576,SF,1993-06-03
Langston Galloway,Detroit Pistons,7333333,PG,1991-12-09
Evan Turner,Atlanta Hawks,18606556,PG,1988-10-27
Norman Powell,Toronto Raptors,10116576,SG,1993-05-25
Nicolas Claxton,Brooklyn Nets,898310,PF,1999-04-17
Michael Frazier,Houston Rockets,79568,G,1994-03-08
Paul Millsap,Denver Nuggets,30000000,PF,1985-02-10
Furkan Korkmaz,Philadelphia 76ers,1620564,SG,1997-07-24
Trey Burke,Philadelphia 76ers,2028594,PG,1992-11-12
Bradley Beal,Washington Wizards,27093018,SG,1993-06-28


In [207]:
# Question 40: What are the players in the 'nba' DataFrame whose names range from "Zach Collins" to the end of the index, after setting the 'Name' column as the index?
new_nba.loc["Zach Collins":]

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Zach Collins,Portland Trail Blazers,4240200,C,1997-11-19
Stanley Johnson,Toronto Raptors,3623000,PF,1996-05-29
Boban Marjanovic,Dallas Mavericks,3500000,C,1988-08-15
Josh Magette,Orlando Magic,79568,PG,1989-11-28
Kyle Lowry,Toronto Raptors,33296296,PG,1986-03-25
...,...,...,...,...
Austin Rivers,Houston Rockets,2174310,PG,1992-08-01
Harry Giles,Sacramento Kings,2578800,PF,1998-04-22
Robin Lopez,Milwaukee Bucks,4767000,C,1988-04-01
Collin Sexton,Cleveland Cavaliers,4764960,PG,1999-01-04


In [208]:
# Question 41: What are the players in the 'nba' DataFrame whose names range from the beginning of the index to "Al Horford," after setting the 'Name' column as the index?
new_nba.loc[:'Al Horford']

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,1445697,SG,1996-09-26
Christian Wood,Detroit Pistons,1645357,PF,1995-09-27
PJ Washington,Charlotte Hornets,3831840,PF,1998-08-23
Derrick Rose,Detroit Pistons,7317074,PG,1988-10-04
Marial Shayok,Philadelphia 76ers,79568,G,1995-07-26
...,...,...,...,...
Kevin Knox,New York Knicks,4380120,PF,1999-08-11
Jeff Green,Utah Jazz,2564753,PF,1986-08-28
Ersan Ilyasova,Milwaukee Bucks,7000000,PF,1987-05-15
Caleb Swanigan,Sacramento Kings,2033160,PF,1997-04-18


In [214]:
# Question 42: What is the information for the player at index 300 in the 'nba' DataFrame?
nba[nba.index ==300]

Unnamed: 0,Team,Salary,Position,Name,Birthday
300,Denver Nuggets,1416852,PF,Jarred Vanderbilt,1999-04-03


In [225]:
nba.loc[[300]]

Unnamed: 0,Team,Salary,Position,Name,Birthday
300,Denver Nuggets,1416852,PF,Jarred Vanderbilt,1999-04-03


In [228]:
# Question 43: Provide the information for players at indices 100, 200, 300, and 400 in the 'nba' DataFrame.
nba.loc[[100,200,300,400]]

Unnamed: 0,Team,Salary,Position,Name,Birthday
100,Indiana Pacers,79568,SG,Brian Bowen,1998-10-02
200,San Antonio Spurs,5846154,SF,Marco Belinelli,1986-03-25
300,Denver Nuggets,1416852,PF,Jarred Vanderbilt,1999-04-03
400,Detroit Pistons,79568,F,Louis King,1999-04-06


In [230]:
# Question 44: Provide the information for players at indices 400 to 403 in the 'nba' DataFrame.
nba.loc[400:403]


Unnamed: 0,Team,Salary,Position,Name,Birthday
400,Detroit Pistons,79568,F,Louis King,1999-04-06
401,Los Angeles Lakers,79568,PF,Kostas Antetokounmpo,1997-11-20
402,Brooklyn Nets,1699236,PF,Rodions Kurucs,1998-02-05
403,Brooklyn Nets,10605600,PG,Spencer Dinwiddie,1993-04-06


In [233]:
# Question 45: Provide the information for the first two players in the 'nba' DataFrame.
nba.loc[:1]

Unnamed: 0,Team,Salary,Position,Name,Birthday
0,Philadelphia 76ers,1445697,SG,Shake Milton,1996-09-26
1,Detroit Pistons,1645357,PF,Christian Wood,1995-09-27


In [234]:
# Question 46: Provide the information for players at index 447 and beyond in the 'nba' DataFrame.
nba.loc[447:]

Unnamed: 0,Team,Salary,Position,Name,Birthday
447,Milwaukee Bucks,4767000,C,Robin Lopez,1988-04-01
448,Cleveland Cavaliers,4764960,PG,Collin Sexton,1999-01-04
449,Phoenix Suns,16200000,PG,Ricky Rubio,1990-10-21


In [248]:
# Question 47: Provide the information for players at indices -10 to -6 in the 'nba' DataFrame.
nba.iloc[-10:-6]

Unnamed: 0,Team,Salary,Position,Name,Birthday
440,Los Angeles Lakers,2564753,PF,Jared Dudley,1985-07-10
441,Chicago Bulls,79568,SG,Max Strus,1996-03-28
442,Golden State Warriors,4464286,C,Kevon Looney,1996-02-06
443,Charlotte Hornets,1557250,C,Willy Hernangomez,1994-05-27


In [253]:
# Question 48: Provide the information for players at indices 0 to 9 with a step size of 2 in the 'nba' DataFrame.
nba.iloc[slice(0,9,2)]

Unnamed: 0,Team,Salary,Position,Name,Birthday
0,Philadelphia 76ers,1445697,SG,Shake Milton,1996-09-26
2,Charlotte Hornets,3831840,PF,PJ Washington,1998-08-23
4,Philadelphia 76ers,79568,G,Marial Shayok,1995-07-26
6,Miami Heat,1416852,SG,Kendrick Nunn,1995-08-03
8,Milwaukee Bucks,12093024,C,Brook Lopez,1988-04-01


In [256]:
# Question 49: What is the team of the player at index 57 in the 'nba' DataFrame?
nba.loc[[57]][['Team']]

Unnamed: 0,Team
57,Utah Jazz


In [30]:
# Question 50: Provide the information for players at indices 100 to 103 with columns up to the third column (exclusive) in the 'nba' DataFrame.
nba.iloc[100:104,:3]

Unnamed: 0,Name,Team,Position
100,Brian Bowen,Indiana Pacers,SG
101,Aaron Holiday,Indiana Pacers,PG
102,Troy Daniels,Los Angeles Lakers,SG
103,Buddy Hield,Sacramento Kings,SG


In [61]:
# Question 51: What is the birthday of the player "Austin Rivers" in the 'nba' DataFrame?
nba.loc[nba['Name']=='Austin Rivers', 'Birthday']

445   1992-08-01
Name: Birthday, dtype: datetime64[ns]

In [76]:
# Question 52: What is the position of the player at index 263 in the 'nba' DataFrame?
nba.loc[263, 'Position']

'PF'

In [88]:
# Question 53: Using the %timeit magic command, what is the execution time of accessing the birthday of the player "Austin Rivers" using the .at accessor?
%timeit nba.at[nba['Name'].eq('Austin Rivers').idxmax(), 'Birthday']

85.4 µs ± 4.27 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [94]:
nba['Name'].eq('Austin Rivers').idxmax(), 'Birthday'

(445, 'Birthday')

In [99]:
%timeit nba[nba['Name']== "Austin Rivers"]

152 µs ± 3.38 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [103]:
# Question 54: Using the %timeit magic command, what is the execution time of 
# accessing the birthday of the player "Austin Rivers" using the .loc accessor?
%timeit nba.loc[nba['Name']=="Austin Rivers"]

175 µs ± 16.8 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [106]:
# Question 55: Using the %timeit magic command, what is the execution time of accessing the position of the player at index 263 using the .iat accessor?
%timeit nba.iat[263,nba.columns.get_loc('Position')]

9.94 µs ± 1.23 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [110]:
# Question 56: Using the %timeit magic command, what is the execution time of accessing the position of the player at index 263 using the .iloc accessor?
%timeit nba.iloc[263,nba.columns.get_loc('Position')]

12.1 µs ± 998 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [128]:
# Question 57: What is the salary of the player "Damian Lillard" in the 'nba' DataFrame?
nba.loc[nba['Name']=="Damian Lillard",'Salary']

257    29802321
Name: Salary, dtype: int64

In [122]:
nba[nba['Name']=="Damian Lillard"]['Salary']

257    29802321
Name: Salary, dtype: int64

In [136]:
# Question 58: What is the salary of the player "Damian Lillard" using the .at accessor?
nba.at[nba['Name'].eq("Damian Lillard").idxmax(),'Salary']

29802321

In [145]:
# Question 59: What is the salary of the player at index 234 in the 'nba' DataFrame?
nba.at[234,'Salary']

2033160

In [147]:
# Question 60: What is the salary of the player at index 234 using the .iat accessor?
nba.iat[234,nba.columns.get_loc('Salary')]

2033160

In [148]:
# Question 61: What are the original column names of the 'nba' DataFrame?
nba.columns

Index(['Name', 'Team', 'Position', 'Birthday', 'Salary'], dtype='object')

In [158]:
# Question 62: After renaming the columns to ["Team", "Position", "Date of Birth", "Pay"], what are the column names of the 'nba' DataFrame?
nba.rename(columns={'Birthday': "Date of Birth",'Salary':"Pay"},inplace=True)

In [159]:
nba.columns

Index(['Name', 'Team', 'Position', 'Date of Birth', 'Pay'], dtype='object')

In [161]:
# Question 63: After renaming the "Date of Birth" column to "Birthday," what are the column names of the 'nba' DataFrame?
nba.rename(columns={"Date of Birth":"Birthday"},inplace=True)
nba.columns

Index(['Name', 'Team', 'Position', 'Birthday', 'Pay'], dtype='object')

In [164]:
# Question 64: What is the information for the player "Giannis Antetokounmpo" in the 'nba' DataFrame?
nba[nba['Name']=="Giannis Antetokounmpo"]

Unnamed: 0,Name,Team,Position,Birthday,Pay
393,Giannis Antetokounmpo,Milwaukee Bucks,PF,1994-12-06,25842697


In [170]:
# Question 65: After renaming the player "Giannis Antetokounmpo" to "Greek Freak,
#" what is the information for the player "Greek Freak" in the 'nba' DataFrame?
nba.loc[nba['Name'] == "Giannis Antetokounmpo",'Name']=="Greek Freak"
nba[nba['Name'] == "Greek Freak"]

Unnamed: 0,Name,Team,Position,Birthday,Pay
393,Greek Freak,Milwaukee Bucks,PF,1994-12-06,25842697


In [172]:
# Question 66: What does the 'nba' DataFrame look like after setting the index to "Team"?
nba.set_index('Team')

Unnamed: 0_level_0,Name,Position,Birthday,Pay
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Philadelphia 76ers,Shake Milton,SG,1996-09-26,1445697
Detroit Pistons,Christian Wood,PF,1995-09-27,1645357
Charlotte Hornets,PJ Washington,PF,1998-08-23,3831840
Detroit Pistons,Derrick Rose,PG,1988-10-04,7317074
Philadelphia 76ers,Marial Shayok,G,1995-07-26,79568
...,...,...,...,...
Houston Rockets,Austin Rivers,PG,1992-08-01,2174310
Sacramento Kings,Harry Giles,PF,1998-04-22,2578800
Milwaukee Bucks,Robin Lopez,C,1988-04-01,4767000
Cleveland Cavaliers,Collin Sexton,PG,1999-01-04,4764960


In [175]:
# Question 67: What does the 'nba' DataFrame look like after resetting the index?
nba.reset_index(drop=True,inplace = True)

In [179]:
# Question 68: What does the 'nba' DataFrame look like after resetting the index and setting the index to "Team"?
nba.reset_index(drop=True,inplace=True)
nba.set_index('Team',inplace=True)
nba

Unnamed: 0_level_0,Name,Position,Birthday,Pay
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Philadelphia 76ers,Shake Milton,SG,1996-09-26,1445697
Detroit Pistons,Christian Wood,PF,1995-09-27,1645357
Charlotte Hornets,PJ Washington,PF,1998-08-23,3831840
Detroit Pistons,Derrick Rose,PG,1988-10-04,7317074
Philadelphia 76ers,Marial Shayok,G,1995-07-26,79568
...,...,...,...,...
Houston Rockets,Austin Rivers,PG,1992-08-01,2174310
Sacramento Kings,Harry Giles,PF,1998-04-22,2578800
Milwaukee Bucks,Robin Lopez,C,1988-04-01,4767000
Cleveland Cavaliers,Collin Sexton,PG,1999-01-04,4764960


In [3]:
import pandas as pd

In [19]:
nfl = pd.read_csv('nba.csv')
nfl.head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,9/26/96,1445697
1,Christian Wood,Detroit Pistons,PF,9/27/95,1645357
2,PJ Washington,Charlotte Hornets,PF,8/23/98,3831840
3,Derrick Rose,Detroit Pistons,PG,10/4/88,7317074
4,Marial Shayok,Philadelphia 76ers,G,7/26/95,79568


In [20]:
# Question 69: What is the shape of the 'nfl' DataFrame?
nfl.shape

(450, 5)

In [10]:
# Question 70: What are the top 5 teams with the most players in the 'nfl' DataFrame?
nlf['Team'].value_counts().head(5)

Philadelphia 76ers        17
Minnesota Timberwolves    17
Utah Jazz                 17
Brooklyn Nets             17
Toronto Raptors           16
Name: Team, dtype: int64

In [22]:
# Question 71: What does the 'nfl' DataFrame look like after sorting it by salary in descending order and displaying the top 5 rows?
nfl.sort_values(by='Salary', ascending=False).head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
205,Stephen Curry,Golden State Warriors,PG,3/14/88,40231758
219,Russell Westbrook,Houston Rockets,PG,11/12/88,38506482
38,Chris Paul,Oklahoma City Thunder,PG,5/6/85,38506482
251,John Wall,Washington Wizards,PG,9/6/90,38199000
264,James Harden,Houston Rockets,PG,8/26/89,38199000


In [24]:
# Question 72: What does the 'nfl' DataFrame look like after sorting it first by "Team" in ascending order and then by "Salary" in descending order?
nfl.sort_values(by = ['Team', 'Salary'],ascending=[True,False])

Unnamed: 0,Name,Team,Position,Birthday,Salary
111,Chandler Parsons,Atlanta Hawks,SF,10/25/88,25102512
28,Evan Turner,Atlanta Hawks,PG,10/27/88,18606556
167,Allen Crabbe,Atlanta Hawks,SG,4/9/92,18500000
213,De'Andre Hunter,Atlanta Hawks,SF,12/2/97,7068360
339,Jabari Parker,Atlanta Hawks,PF,3/15/95,6500000
...,...,...,...,...,...
80,Isaac Bonga,Washington Wizards,PG,11/8/99,1416852
399,Admiral Schofield,Washington Wizards,SF,3/30/97,1000000
273,Justin Robinson,Washington Wizards,PG,10/12/97,898310
283,Garrison Mathews,Washington Wizards,SG,10/24/96,79568


In [33]:
# Question 73: What does the 'nfl' DataFrame look like after resetting the index and setting the index to "Team"?nfl.reindex()
nfl.reset_index(inplace=True)
nfl.set_index('Team', inplace=True)
nfl

Unnamed: 0_level_0,Name,Position,Birthday,Salary
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Philadelphia 76ers,Shake Milton,SG,9/26/96,1445697
Detroit Pistons,Christian Wood,PF,9/27/95,1645357
Charlotte Hornets,PJ Washington,PF,8/23/98,3831840
Detroit Pistons,Derrick Rose,PG,10/4/88,7317074
Philadelphia 76ers,Marial Shayok,G,7/26/95,79568
...,...,...,...,...
Houston Rockets,Austin Rivers,PG,8/1/92,2174310
Sacramento Kings,Harry Giles,PF,4/22/98,2578800
Milwaukee Bucks,Robin Lopez,C,4/1/88,4767000
Cleveland Cavaliers,Collin Sexton,PG,1/4/99,4764960


In [49]:
# Question 74: What are the players from the "New York Jets" in the 'nfl' DataFrame?
nfl.loc["New York Jets"]

KeyError: 'New York Jets'

In [44]:
# Question 75: What is the earliest birthday among the players from the "New York Jets" in the 'nfl' DataFrame?


In [50]:
# Question 76: What is the player with the earliest birthday among all teams in the 'nfl' DataFrame?
nfl['Birthday'].idxmin()

TypeError: reduction operation 'argmin' not allowed for this dtype