## 4.1 Overview of a DataFrame

In [None]:
import pandas as pd
import numpy as np

### 4.1.1 Creating DataFrame from Dictionary

In [None]:
city_data = {
    "City": ["New York City", "Paris", "Barcelona", "Rome"],
    "Country": ["United States", "France", "Spain", "Italy"],
    "Population": pd.Series([8600000, 2141000, 5515000, 2873000])
}

cities = pd.DataFrame(city_data)
cities

In [None]:
# The two lines below are equivalent
cities.transpose()
cities.T

### 4.1.2 Creating a DataFrame from a NumPy ndarray

In [None]:
random_data = np.random.randint(1, 101, [3, 5])
random_data

In [None]:
pd.DataFrame(data = random_data)

In [None]:
row_labels = ["Morning", "Afternoon", "Evening"]
temperatures = pd.DataFrame(
    data = random_data, index = row_labels
)
temperatures

In [None]:
row_labels = ["Morning", "Afternoon", "Evening"]
column_labels = (
    "Monday",
    "Tuesday",
    "Wednesday",
    "Thursday",
    "Friday"
)

pd.DataFrame(
    data = random_data,
    index = row_labels,
    columns = column_labels,
)

In [None]:
row_labels = ["Morning", "Afternoon", "Morning"]
column_lables = [
    "Monday",
    "Tuesday",
    "Wednesday",
    "Tuesday",
    "Friday"
]

pd.DataFrame(
    data = random_data,
    index = row_labels,
    columns = column_labels,
)


## 4.2 Similarities between Series and DataFrames

### 4.2.1 Importing a DataFrame with the read_csv Function

In [None]:
pd.read_csv("nba.csv")

In [None]:
pd.read_csv("nba.csv", parse_dates = ["Birthday"])

In [None]:
nba = pd.read_csv("nba.csv", parse_dates = ["Birthday"])

### 4.2.2 Shared and Exclusive Attributes between Series and DataFrames

In [None]:
pd.Series([1, 2, 3]).dtype

In [None]:
nba.dtypes

In [None]:
nba.dtypes.value_counts()

In [None]:
nba.index

In [None]:
nba.columns

In [None]:
nba.ndim

In [None]:
nba.shape

In [None]:
nba.size

In [None]:
nba.count()

In [None]:
nba.count().sum()

In [None]:
data = {
    "A": [1, np.nan],
    "B": [2, 3]
}

df = pd.DataFrame(data)
df

In [None]:
df.size

In [None]:
df.count()

In [None]:
df.count().sum()

### 4.2.3 Shared Methods between Series and DataFrames

In [None]:
nba.head(2)

In [None]:
nba.tail(n = 3)

In [None]:
nba.tail()

In [None]:
nba.sample(3)

In [None]:
nba.nunique()

In [None]:
nba.max()

In [None]:
nba.min()

In [None]:
nba.nlargest(n = 4, columns = "Salary")

In [None]:
nba.nsmallest(n = 3, columns = ["Birthday"])

In [None]:
nba.sum()

In [None]:
nba.sum(numeric_only = True)

In [None]:
nba.mean(numeric_only = True)

In [None]:
nba.median(numeric_only = True)

In [None]:
nba.mode(numeric_only = True)

In [None]:
nba.std(numeric_only = True)

## 4.3 Sorting a DataFrame

### 4.3.1 Sorting by Single Column

In [None]:
# The two lines below are equivalent
nba.sort_values("Name")
nba.sort_values(by = "Name")

In [None]:
nba.sort_values("Name", ascending = False).head()

In [None]:
nba.sort_values("Birthday", ascending = False).head()

### 4.3.2 Sorting by Multiple Columns

In [None]:
nba.sort_values(by = ["Team", "Name"])

In [None]:
nba.sort_values(["Team", "Name"], ascending = False)

In [None]:
nba.sort_values(
    by = ["Team", "Salary"], ascending = [True, False]
)

In [None]:
nba = nba.sort_values(
    by = ["Team", "Salary"],
    ascending = [True, False]
)

## 4.4 Sorting by Index

In [None]:
nba.head()

### 4.4.1 Sorting by Row Index

In [None]:
# The two lines below are equivalent
nba.sort_index().head()
nba.sort_index(ascending = True).head()

In [None]:
nba.sort_index(ascending = False).head()

In [None]:
nba = nba.sort_index()

### 4.4.2 Sorting by Column Index

In [None]:
# The two lines below are equivalent
nba.sort_index(axis = "columns").head()
nba.sort_index(axis = 1).head()

In [None]:
nba.sort_index(axis = "columns", ascending = False).head()

## 4.5 Setting a New Index

In [None]:
# The two lines below are equivalent
nba.set_index(keys = "Name")
nba.set_index("Name")

In [None]:
nba = nba.set_index(keys = "Name")

In [None]:
nba = pd.read_csv(
    "nba.csv", parse_dates = ["Birthday"], index_col = "Name"
)

## 4.6 Selecting Columns and Rows from a DataFrame

### 4.6.1 Selecting a Single Column from a DataFrame

In [None]:
nba.Salary

In [None]:
nba["Position"]

### 4.6.2 Selecting Multiple Columns from a DataFrame

In [None]:
nba[["Salary", "Birthday"]].head()

In [None]:
nba[["Birthday", "Salary"]].head()

In [None]:
nba.select_dtypes(include = "object")

In [None]:
nba.select_dtypes(exclude = ["object", "int"])

## 4.7 Selecting Rows from a DataFrame

### 4.7.1 Extracting Rows by Index Label

In [None]:
nba.loc["LeBron James"]

In [None]:
nba.loc[["Kawhi Leonard", "Paul George"]]

In [None]:
nba.loc[["Paul George", "Kawhi Leonard"]]

In [None]:
nba.sort_index().loc["Otto Porter":"Patrick Beverley"]

In [None]:
players = ["Otto Porter", "PJ Dozier", "PJ Washington"]
players[0:2]

In [None]:
nba.sort_index().loc["Zach Collins":]

In [None]:
nba.sort_index().loc[:"Al Horford"]

**NOTE**: I've commented out the code below so that the Notebook can run without raising an error.

In [None]:
# nba.loc["Bugs Bunny"]

### 4.7.2 Extracting Rows by Index Position

In [None]:
nba.iloc[300]

In [None]:
nba.iloc[[100, 200, 300, 400]]

In [None]:
nba.iloc[400:404]

In [None]:
nba.iloc[:2]

In [None]:
nba.iloc[447:]

In [None]:
nba.iloc[-10:-6]

In [None]:
nba.iloc[0:10:2]

### 4.7.3 Extracting Values from Specific Columns

In [None]:
nba.loc["Giannis Antetokounmpo", "Team"]

In [None]:
nba.loc["James Harden", ["Position", "Birthday"]]

In [None]:
nba.loc[
    ["Russell Westbrook", "Anthony Davis"],
    ["Team", "Salary"]
]

In [None]:
nba.loc["Joel Embiid", "Position":"Salary"]

In [None]:
nba.loc["Joel Embiid", "Salary":"Position"]

In [None]:
nba.iloc[57, 3]

In [None]:
nba.iloc[100:104, :3]

In [None]:
nba.at["Austin Rivers", "Birthday"]

In [None]:
nba.iat[263, 1]

In [None]:
%%timeit
nba.at["Austin Rivers", "Birthday"]

In [None]:
%%timeit
nba.loc["Austin Rivers", "Birthday"]

In [None]:
%%timeit
nba.iat[263, 1]

In [None]:
%%timeit
nba.iloc[263, 1]

## 4.8 Extracting Values from Series

In [None]:
nba["Salary"].loc["Damian Lillard"]

In [None]:
nba["Salary"].at["Damian Lillard"]

In [None]:
nba["Salary"].iloc[234]

In [None]:
nba["Salary"].iat[234]

## 4.9 Renaming Columns or Rows

In [None]:
nba.columns

In [None]:
nba.columns = ["Team", "Position", "Date of Birth", "Pay"]
nba.head(1)

In [None]:
nba.rename(columns = { "Date of Birth": "Birthday" })

In [None]:
nba = nba.rename(columns = { "Date of Birth": "Birthday" })

In [None]:
nba.loc["Giannis Antetokounmpo"]

In [None]:
nba = nba.rename(
    index = { "Giannis Antetokounmpo": "Greek Freak" }    
)

In [None]:
nba.loc["Greek Freak"]

## 4.10 Resetting an Index

In [None]:
nba.set_index("Team").head()

In [None]:
nba.reset_index().head()

In [None]:
nba.reset_index().set_index("Team").head()

In [None]:
nba = nba.reset_index().set_index("Team")

## 4.11 Coding Challenge

### 4.11.1 Problems

### 4.11.2 Solutions

In [None]:
nfl = pd.read_csv("nfl.csv", parse_dates = ["Birthday"])
nfl

In [None]:
nfl = nfl.set_index("Name")

In [None]:
nfl = pd.read_csv("nfl.csv", index_col = "Name", parse_dates = ["Birthday"])

In [None]:
nfl.head()

In [None]:
# The two lines below are equivalent
nfl.Team.value_counts().head()
nfl["Team"].value_counts().head()

In [None]:
nfl.sort_values("Salary", ascending = False).head()

In [None]:
nfl.sort_values(
    by = ["Team", "Salary"],
    ascending = [True, False]
)

In [None]:
nfl = nfl.reset_index().set_index(keys = "Team")
nfl.head(3)

In [None]:
nfl.loc["New York Jets"].head()

In [None]:
nfl.loc["New York Jets"].sort_values("Birthday").head(1)

## 4.12 Summary