**Pre-requisites**

In [None]:
!python -m pip install pandas pyarrow

**Possible Solution - Retaining An Old Index**

**Note:** This code parses the dates as opposed to leaving them as strings.

In [18]:
# This version will parse the "date_of_birth" column.
# These dates will be formatted differently to the tutorial.

import pandas as pd

beach_boys = pd.read_csv(
    "band_members.csv",
    parse_dates=["date_of_birth"],
    dayfirst=True,
).convert_dtypes(dtype_backend="pyarrow")

beach_boys = beach_boys.assign(
    date_of_birth=beach_boys["date_of_birth"].dt.date
)

In [19]:
# Tutorial version of the code.

import pandas as pd

beach_boys = pd.read_csv("band_members.csv").convert_dtypes(
    dtype_backend="pyarrow"
)

beach_boys.index = range(1, 10)
beach_boys = beach_boys.reset_index(names="old_index")
beach_boys

Unnamed: 0,old_index,first_name,last_name,instrument,date_of_birth
0,1,Brian,Wilson,Bass,20-Jun-1942
1,2,Mike,Love,Saxophone,15-Mar-1941
2,3,Al,Jardine,Guitar,03-Sep-1942
3,4,Bruce,Johnston,Bass,27-Jun-1942
4,5,Carl,Wilson,Guitar,21-Dec-1946
5,6,Dennis,Wilson,Drums,04-Dec-1944
6,7,David,Marks,Guitar,22-Aug-1948
7,8,Ricky,Fataar,Drums,05-Sep-1952
8,9,Blondie,Chaplin,Guitar,07-Jul-1951


**Possible Solution - Using `.index` and row selection**

In [20]:
import pandas as pd

beach_boys = pd.read_csv("band_members.csv").convert_dtypes(
    dtype_backend="pyarrow"
)

beach_boys.index = [x for x in range(1, 20) if x % 2 == 0]

beach_boys

Unnamed: 0,first_name,last_name,instrument,date_of_birth
2,Brian,Wilson,Bass,20-Jun-1942
4,Mike,Love,Saxophone,15-Mar-1941
6,Al,Jardine,Guitar,03-Sep-1942
8,Bruce,Johnston,Bass,27-Jun-1942
10,Carl,Wilson,Guitar,21-Dec-1946
12,Dennis,Wilson,Drums,04-Dec-1944
14,David,Marks,Guitar,22-Aug-1948
16,Ricky,Fataar,Drums,05-Sep-1952
18,Blondie,Chaplin,Guitar,07-Jul-1951


In [21]:
beach_boys.loc[16:18]

Unnamed: 0,first_name,last_name,instrument,date_of_birth
16,Ricky,Fataar,Drums,05-Sep-1952
18,Blondie,Chaplin,Guitar,07-Jul-1951


In [22]:
beach_boys.iloc[-2:]

Unnamed: 0,first_name,last_name,instrument,date_of_birth
16,Ricky,Fataar,Drums,05-Sep-1952
18,Blondie,Chaplin,Guitar,07-Jul-1951


**Possible Solution - Using `.set_axis()`**

In [23]:
beach_boys = beach_boys.set_axis(labels=[x**2 for x in range(0, 9)])
beach_boys

Unnamed: 0,first_name,last_name,instrument,date_of_birth
0,Brian,Wilson,Bass,20-Jun-1942
1,Mike,Love,Saxophone,15-Mar-1941
4,Al,Jardine,Guitar,03-Sep-1942
9,Bruce,Johnston,Bass,27-Jun-1942
16,Carl,Wilson,Guitar,21-Dec-1946
25,Dennis,Wilson,Drums,04-Dec-1944
36,David,Marks,Guitar,22-Aug-1948
49,Ricky,Fataar,Drums,05-Sep-1952
64,Blondie,Chaplin,Guitar,07-Jul-1951


**Possible Solutions - Index Restoration**

In [25]:
import pandas as pd

beach_boys = pd.read_csv("band_members.csv").convert_dtypes(
    dtype_backend="pyarrow"
)

beach_boys = beach_boys.drop(labels=[3, 5])
beach_boys

Unnamed: 0,first_name,last_name,instrument,date_of_birth
0,Brian,Wilson,Bass,20-Jun-1942
1,Mike,Love,Saxophone,15-Mar-1941
2,Al,Jardine,Guitar,03-Sep-1942
4,Carl,Wilson,Guitar,21-Dec-1946
6,David,Marks,Guitar,22-Aug-1948
7,Ricky,Fataar,Drums,05-Sep-1952
8,Blondie,Chaplin,Guitar,07-Jul-1951


In [26]:
# (i) Using .reset_index()
beach_boys = beach_boys.reset_index(drop=True)
beach_boys

Unnamed: 0,first_name,last_name,instrument,date_of_birth
0,Brian,Wilson,Bass,20-Jun-1942
1,Mike,Love,Saxophone,15-Mar-1941
2,Al,Jardine,Guitar,03-Sep-1942
3,Carl,Wilson,Guitar,21-Dec-1946
4,David,Marks,Guitar,22-Aug-1948
5,Ricky,Fataar,Drums,05-Sep-1952
6,Blondie,Chaplin,Guitar,07-Jul-1951


In [27]:
# (ii) Using .index
beach_boys.index = [x for x in range(len(beach_boys))]
beach_boys

Unnamed: 0,first_name,last_name,instrument,date_of_birth
0,Brian,Wilson,Bass,20-Jun-1942
1,Mike,Love,Saxophone,15-Mar-1941
2,Al,Jardine,Guitar,03-Sep-1942
3,Carl,Wilson,Guitar,21-Dec-1946
4,David,Marks,Guitar,22-Aug-1948
5,Ricky,Fataar,Drums,05-Sep-1952
6,Blondie,Chaplin,Guitar,07-Jul-1951


**Possible Solution - Dealing With Duplicates**

In [28]:
beach_boys = pd.read_csv("band_members.csv").convert_dtypes(
    dtype_backend="pyarrow"
)

guitar_players = beach_boys.query("instrument == 'Guitar'").reset_index(
    drop=True
)

others = beach_boys.query("instrument != 'Guitar'").reset_index(drop=True)

all_beach_boys = pd.concat([guitar_players, others]).reset_index(drop=True)

In [29]:
all_beach_boys.loc[[3]]

Unnamed: 0,first_name,last_name,instrument,date_of_birth
3,Blondie,Chaplin,Guitar,07-Jul-1951


In [30]:
all_beach_boys.iloc[[3]]

Unnamed: 0,first_name,last_name,instrument,date_of_birth
3,Blondie,Chaplin,Guitar,07-Jul-1951


In [31]:
all_beach_boys.filter(items=[1, 3], axis="index")

Unnamed: 0,first_name,last_name,instrument,date_of_birth
1,Carl,Wilson,Guitar,21-Dec-1946
3,Blondie,Chaplin,Guitar,07-Jul-1951


**Possible Solution - Customising Existing Columns**

In [32]:
import pandas as pd

beach_boys = pd.read_csv(
    "band_members.csv",
).convert_dtypes(dtype_backend="pyarrow")

In [33]:
def calculate_user_ID(row):
    return f"{row["last_name"]}{row["first_name"][0]}"

beach_boys.index = beach_boys.apply(calculate_user_ID, axis=1)

beach_boys

Unnamed: 0,first_name,last_name,instrument,date_of_birth
WilsonB,Brian,Wilson,Bass,20-Jun-1942
LoveM,Mike,Love,Saxophone,15-Mar-1941
JardineA,Al,Jardine,Guitar,03-Sep-1942
JohnstonB,Bruce,Johnston,Bass,27-Jun-1942
WilsonC,Carl,Wilson,Guitar,21-Dec-1946
WilsonD,Dennis,Wilson,Drums,04-Dec-1944
MarksD,David,Marks,Guitar,22-Aug-1948
FataarR,Ricky,Fataar,Drums,05-Sep-1952
ChaplinB,Blondie,Chaplin,Guitar,07-Jul-1951


**Possible Solution - Index Alignment I**

In [34]:
week1_sales = pd.read_csv("week1_record_sales.csv").set_index("index")

In [35]:
week2_sales = pd.read_csv("week2_record_sales.csv").set_index("index")

In [36]:
week1_sales

Unnamed: 0_level_0,day,sales
index,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Mon,100
1,Tue,150
2,Wed,200
3,Thu,250
4,Fri,300


In [37]:
week2_sales

Unnamed: 0_level_0,day,sales
index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Mon,100
2,Tue,150
3,Wed,200
4,Thu,250
5,Fri,300


In [38]:
week1_sales.index = week2_sales.index

In [39]:
week1_sales

Unnamed: 0_level_0,day,sales
index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Mon,100
2,Tue,150
3,Wed,200
4,Thu,250
5,Fri,300


In [40]:
week2_sales

Unnamed: 0_level_0,day,sales
index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Mon,100
2,Tue,150
3,Wed,200
4,Thu,250
5,Fri,300


In [41]:
week1_sales.loc[:, "sales"] + week2_sales.loc[:, "sales"]

index
1    200
2    300
3    400
4    500
5    600
Name: sales, dtype: int64

In [42]:
week1_sales.merge(week2_sales, left_index=True, right_index=True)

Unnamed: 0_level_0,day_x,sales_x,day_y,sales_y
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Mon,100,Mon,100
2,Tue,150,Tue,150
3,Wed,200,Wed,200
4,Thu,250,Thu,250
5,Fri,300,Fri,300


**Possible Solution - Index Alignment II**

In [43]:
week1_sales = week1_sales.set_index("day")
week2_sales = week2_sales.set_index("day")

week1_sales

Unnamed: 0_level_0,sales
day,Unnamed: 1_level_1
Mon,100
Tue,150
Wed,200
Thu,250
Fri,300


In [44]:
week2_sales

Unnamed: 0_level_0,sales
day,Unnamed: 1_level_1
Mon,100
Tue,150
Wed,200
Thu,250
Fri,300


In [45]:
week1_sales.loc[:, "sales"] + week2_sales.loc[:, "sales"]

day
Mon    200
Tue    300
Wed    400
Thu    500
Fri    600
Name: sales, dtype: int64

In [46]:
week1_sales.merge(week2_sales, left_index=True, right_index=True)

Unnamed: 0_level_0,sales_x,sales_y
day,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,100,100
Tue,150,150
Wed,200,200
Thu,250,250
Fri,300,300


**Possible Solution - Dealing With A `MultiIndex`**

In [55]:
cereals = pd.read_csv("cereals.csv").convert_dtypes(dtype_backend="pyarrow")

cereals = cereals.pivot_table(
    values="fiber",
    index=["type", "manufacturer"],
    aggfunc="mean",
).reset_index(level=1)

cereals

Unnamed: 0_level_0,manufacturer,fiber
type,Unnamed: 1_level_1,Unnamed: 2_level_1
Cold,General Mills,1.272727
Cold,Kelloggs,2.73913
Cold,Nabisco,4.6
Cold,Post,2.777778
Cold,Quaker Oats,1.142857
Cold,Ralston Purina,1.875
Hot,American Home Food Products,0.0
Hot,Nabisco,1.0
Hot,Quaker Oats,2.7


**Possible Solution - Creating A Meaningful Index**

In [56]:
cereals = pd.read_csv("cereals.csv").convert_dtypes(dtype_backend="pyarrow")

cereals = cereals.pivot_table(
    values="fiber", index=["manufacturer", "type"], aggfunc="mean"
)

cereals.index = cereals.index.to_flat_index()
cereals

Unnamed: 0,fiber
"(American Home Food Products, Hot)",0.0
"(General Mills, Cold)",1.272727
"(Kelloggs, Cold)",2.73913
"(Nabisco, Cold)",4.6
"(Nabisco, Hot)",1.0
"(Post, Cold)",2.777778
"(Quaker Oats, Cold)",1.142857
"(Quaker Oats, Hot)",2.7
"(Ralston Purina, Cold)",1.875
