# INTRODUCTION TO DATAFRAME OBJECTS

In [1]:
# Creating a dictionary
cities = {
    "knr": ["smart", 32.5, 7, "biryani"],
    "hyd": ["adv", 28.0, 3, "shahi murgi"],
    "trp": ["nice", 35.0, 4, "laddu"]
}

In [2]:
# imports
import numpy as np
import pandas as pd

In [3]:
# Converting a dicationary to a DataFrame
city_data = pd.DataFrame(cities, index=["type", "temp", "rating", "famous_food"])
city_data

Unnamed: 0,knr,hyd,trp
type,smart,adv,nice
temp,32.5,28.0,35.0
rating,7,3,4
famous_food,biryani,shahi murgi,laddu


In [4]:
# The following two lines give the same result
city_data.T
city_data.transpose()

Unnamed: 0,type,temp,rating,famous_food
knr,smart,32.5,7,biryani
hyd,adv,28.0,3,shahi murgi
trp,nice,35.0,4,laddu


In [5]:
random_data = np.random.randint(1, 100, (3, 5))
random_data

array([[42, 22,  8, 93, 12],
       [35, 42,  6,  4, 92],
       [64, 32, 14, 53, 96]], dtype=int32)

In [6]:
row_index = ["Morning", "Noon", "Evening"]
column_index = ["Mon", "Tue", "Wed", "Thu", "Fri"]
df = pd.DataFrame(data=random_data, index=row_index, columns=column_index)
df

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Morning,42,22,8,93,12
Noon,35,42,6,4,92
Evening,64,32,14,53,96


In [7]:
type(df)

pandas.core.frame.DataFrame

In [8]:
df.columns

Index(['Mon', 'Tue', 'Wed', 'Thu', 'Fri'], dtype='object')

In [9]:
df.index

Index(['Morning', 'Noon', 'Evening'], dtype='object')

In [10]:
# referencing a column
df.Mon  # referring a column as an attribute

Morning    42
Noon       35
Evening    64
Name: Mon, dtype: int32

In [11]:
# referencing column using indexing
df["Tue"]

Morning    22
Noon       42
Evening    32
Name: Tue, dtype: int32

In [12]:
type(df.Mon)

pandas.core.series.Series

In [13]:
type(df["Tue"])

pandas.core.series.Series

In [14]:
# extracting two columns from a DataFrame
df[["Wed", "Fri"]]

Unnamed: 0,Wed,Fri
Morning,8,12
Noon,6,92
Evening,14,96


In [15]:
type(df[["Mon", "Thu"]])

pandas.core.frame.DataFrame

In [16]:
type(df[["Fri"]])

pandas.core.frame.DataFrame

### Attributes and Methods of a DataFrame

In [17]:
df.dtypes

Mon    int32
Tue    int32
Wed    int32
Thu    int32
Fri    int32
dtype: object

In [18]:
df.ndim

2

In [19]:
df.shape

(3, 5)

In [20]:
df.size

15

In [21]:
print(df.count().sum())

15


In [22]:
# sampling rows
df.sample(1)

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Evening,64,32,14,53,96


In [23]:
df.nunique()

Mon    3
Tue    3
Wed    3
Thu    3
Fri    3
dtype: int64

#### Reading nba dataset

In [24]:
nba = pd.read_csv("./data/nba.csv", parse_dates=["Birthday"])
nba.head()

  nba = pd.read_csv("./data/nba.csv", parse_dates=["Birthday"])


Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
1,Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
2,PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
3,Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
4,Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568


In [25]:
nba.sum(numeric_only=True)
nba.mean(numeric_only=True)
nba.median(numeric_only=True)
nba.mode(numeric_only=True)
nba.std(numeric_only=True)

Salary    9.288810e+06
dtype: float64

In [26]:
nba.sort_values("Name").head()      # sort by Name column in ascending order

Unnamed: 0,Name,Team,Position,Birthday,Salary
52,Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
101,Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
437,Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
81,Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
399,Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000


In [27]:
nba.sort_values("Name", ascending=False).head()      # sort by Name column in descending order

Unnamed: 0,Name,Team,Position,Birthday,Salary
248,Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568
137,Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
312,Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
302,Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
159,Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000


In [28]:
nba.sort_values(by="Team").head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
84,John Collins,Atlanta Hawks,PF,1997-09-23,2686560
438,Bruno Fernando,Atlanta Hawks,C,1998-08-15,1400000
20,Kevin Huerter,Atlanta Hawks,SG,1998-08-27,2636280
276,Brandon Goodwin,Atlanta Hawks,PG,1995-10-02,79568
130,Damian Jones,Atlanta Hawks,C,1995-06-30,2305057


In [29]:
nba.sort_values(by=["Team", "Name"]).head(30)

Unnamed: 0,Name,Team,Position,Birthday,Salary
359,Alex Len,Atlanta Hawks,C,1993-06-16,4160000
167,Allen Crabbe,Atlanta Hawks,SG,1992-04-09,18500000
276,Brandon Goodwin,Atlanta Hawks,PG,1995-10-02,79568
438,Bruno Fernando,Atlanta Hawks,C,1998-08-15,1400000
194,Cam Reddish,Atlanta Hawks,SF,1999-09-01,4245720
111,Chandler Parsons,Atlanta Hawks,SF,1988-10-25,25102512
197,Charlie Brown,Atlanta Hawks,SG,1997-02-02,79568
130,Damian Jones,Atlanta Hawks,C,1995-06-30,2305057
213,De'Andre Hunter,Atlanta Hawks,SF,1997-12-02,7068360
28,Evan Turner,Atlanta Hawks,PG,1988-10-27,18606556


In [30]:
nba.sort_values(by=["Team", "Name"], ascending=[True, False]).head(30)

Unnamed: 0,Name,Team,Position,Birthday,Salary
98,Vince Carter,Atlanta Hawks,PF,1977-01-26,2564753
290,Tyrone Wallace,Atlanta Hawks,PG,1994-06-10,1620564
20,Kevin Huerter,Atlanta Hawks,SG,1998-08-27,2636280
84,John Collins,Atlanta Hawks,PF,1997-09-23,2686560
339,Jabari Parker,Atlanta Hawks,PF,1995-03-15,6500000
28,Evan Turner,Atlanta Hawks,PG,1988-10-27,18606556
213,De'Andre Hunter,Atlanta Hawks,SF,1997-12-02,7068360
130,Damian Jones,Atlanta Hawks,C,1995-06-30,2305057
197,Charlie Brown,Atlanta Hawks,SG,1997-02-02,79568
111,Chandler Parsons,Atlanta Hawks,SF,1988-10-25,25102512


In [34]:
# Setting the index
nba = nba.set_index("Name")   # nba.set_index(keys="Name")
nba

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
...,...,...,...,...
Austin Rivers,Houston Rockets,PG,1992-08-01,2174310
Harry Giles,Sacramento Kings,PF,1998-04-22,2578800
Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960


### Sort Values using Index: `sort_index()`

In [36]:
nba.sort_index().head()    # sorts by index in ascending order
nba.sort_index(axis=0).head()   # axis=0 means sort by rows

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000


In [37]:
# both the folloiwng lines give the same result
nba.sort_index(ascending=False).head()
nba.sort_index(axis=0, ascending=False).head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568
Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000


In [38]:
# sorting by columns
nba.sort_index(axis=1).head()
nba.sort_index(axis="columns").head()

Unnamed: 0_level_0,Birthday,Position,Salary,Team
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,1996-09-26,SG,1445697,Philadelphia 76ers
Christian Wood,1995-09-27,PF,1645357,Detroit Pistons
PJ Washington,1998-08-23,PF,3831840,Charlotte Hornets
Derrick Rose,1988-10-04,PG,7317074,Detroit Pistons
Marial Shayok,1995-07-26,G,79568,Philadelphia 76ers


In [39]:
# sorting by columns
nba.sort_index(axis=1, ascending=False).head()
nba.sort_index(axis="columns", ascending=False).head()

Unnamed: 0_level_0,Team,Salary,Position,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,1445697,SG,1996-09-26
Christian Wood,Detroit Pistons,1645357,PF,1995-09-27
PJ Washington,Charlotte Hornets,3831840,PF,1998-08-23
Derrick Rose,Detroit Pistons,7317074,PG,1988-10-04
Marial Shayok,Philadelphia 76ers,79568,G,1995-07-26


In [40]:
nba.head(15)

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
Christian Wood,Detroit Pistons,PF,1995-09-27,1645357
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Derrick Rose,Detroit Pistons,PG,1988-10-04,7317074
Marial Shayok,Philadelphia 76ers,G,1995-07-26,79568
Draymond Green,Golden State Warriors,PF,1990-03-04,18539130
Kendrick Nunn,Miami Heat,SG,1995-08-03,1416852
Cedi Osman,Cleveland Cavaliers,SF,1995-04-08,2907143
Brook Lopez,Milwaukee Bucks,C,1988-04-01,12093024
Torrey Craig,Denver Nuggets,SF,1990-12-19,2000000


### Selecting Rows from a DataFrame

- `loc[]` accessor used to access rows with **text lables**
- `iloc[]` accessor used to access rows with **index numbers (integers)**

In [41]:
nba.iloc[2]

Team          Charlotte Hornets
Position                     PF
Birthday    1998-08-23 00:00:00
Salary                  3831840
Name: PJ Washington, dtype: object

In [42]:
nba.loc["PJ Washington"]

Team          Charlotte Hornets
Position                     PF
Birthday    1998-08-23 00:00:00
Salary                  3831840
Name: PJ Washington, dtype: object

In [43]:
# Two separate indvidual rows
nba.loc[["PJ Washington", "Cedi Osman"]]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PJ Washington,Charlotte Hornets,PF,1998-08-23,3831840
Cedi Osman,Cleveland Cavaliers,SF,1995-04-08,2907143


In [44]:
nba.sort_index().head(12)

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
Al Horford,Philadelphia 76ers,C,1986-06-03,28000000
Al-Farouq Aminu,Orlando Magic,PF,1990-09-21,9258000
Alec Burks,Golden State Warriors,SG,1991-07-20,2320044
Alex Caruso,Los Angeles Lakers,PG,1994-02-28,2750000
Alex Len,Atlanta Hawks,C,1993-06-16,4160000


In [46]:
# sorting is requried to use range (:) operator
nba.sort_index().loc["Al Horford":"Alex Len"] # use : operator

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Al Horford,Philadelphia 76ers,C,1986-06-03,28000000
Al-Farouq Aminu,Orlando Magic,PF,1990-09-21,9258000
Alec Burks,Golden State Warriors,SG,1991-07-20,2320044
Alex Caruso,Los Angeles Lakers,PG,1994-02-28,2750000
Alex Len,Atlanta Hawks,C,1993-06-16,4160000


In [47]:
# from first row till "Al Horford"
nba.sort_index().loc[:"Al Horford"]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aaron Gordon,Orlando Magic,PF,1995-09-16,19863636
Aaron Holiday,Indiana Pacers,PG,1996-09-30,2239200
Abdel Nader,Oklahoma City Thunder,SF,1993-09-25,1618520
Adam Mokoka,Chicago Bulls,G,1998-07-18,79568
Admiral Schofield,Washington Wizards,SF,1997-03-30,1000000
Al Horford,Philadelphia 76ers,C,1986-06-03,28000000


In [51]:
nba.sort_index().loc["Yogi Ferrell":] # from Yogi Ferrell to end

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Yogi Ferrell,Sacramento Kings,PG,1993-05-09,3150000
Yuta Watanabe,Memphis Grizzlies,SF,1994-10-13,79568
Zach Collins,Portland Trail Blazers,C,1997-11-19,4240200
Zach LaVine,Chicago Bulls,PG,1995-03-10,19500000
Zach Norvell,Los Angeles Lakers,SG,1997-12-09,79568
Zhaire Smith,Philadelphia 76ers,SG,1999-06-04,3058800
Zion Williamson,New Orleans Pelicans,F,2000-07-06,9757440
Zylan Cheatham,New Orleans Pelicans,SF,1995-11-17,79568


In [52]:
# Using the iloc[] accessor to get rows by integer location

In [53]:
nba.iloc[300]

Team             Denver Nuggets
Position                     PF
Birthday    1999-04-03 00:00:00
Salary                  1416852
Name: Jarred Vanderbilt, dtype: object

In [54]:
nba.iloc[[100, 200, 300, 400]]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brian Bowen,Indiana Pacers,SG,1998-10-02,79568
Marco Belinelli,San Antonio Spurs,SF,1986-03-25,5846154
Jarred Vanderbilt,Denver Nuggets,PF,1999-04-03,1416852
Louis King,Detroit Pistons,F,1999-04-06,79568


In [55]:
nba.iloc[:2]        # from index 0 to index 2

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shake Milton,Philadelphia 76ers,SG,1996-09-26,1445697
Christian Wood,Detroit Pistons,PF,1995-09-27,1645357


In [56]:
nba.iloc[447:]

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Robin Lopez,Milwaukee Bucks,C,1988-04-01,4767000
Collin Sexton,Cleveland Cavaliers,PG,1999-01-04,4764960
Ricky Rubio,Phoenix Suns,PG,1990-10-21,16200000
