## Axis in Pandas

<img src="../img/axis.png">

In [1]:
## run this data cell
data = {
        'firm': 
        ['Bilk Inc.', 'Vine & Co.', 'Kiln Inc.', 'Y & Y Consulting', 'Trending Inc.'],
        'fee_2020': 
        [1150, 2300, 3400, 1500, 6500]           
       }

In [2]:
## import pandas
import pandas as pd

In [3]:
## create a data frame
df = pd.DataFrame(data)
df

Unnamed: 0,firm,fee_2020
0,Bilk Inc.,1150
1,Vine & Co.,2300
2,Kiln Inc.,3400
3,Y & Y Consulting,1500
4,Trending Inc.,6500


In [4]:
## create a new column that increases fees by 10 percent
df["fee_2021"] = df["fee_2020"].apply(lambda x: x * 1.1)
df

Unnamed: 0,firm,fee_2020,fee_2021
0,Bilk Inc.,1150,1265.0
1,Vine & Co.,2300,2530.0
2,Kiln Inc.,3400,3740.0
3,Y & Y Consulting,1500,1650.0
4,Trending Inc.,6500,7150.0


<img src="../img/axis.png">

In [5]:
## creating another copy of the above df so we can manipulate it
df_ax1 = df.copy()


In [6]:
## We want to add each item 
## in the row of fee_2020 with the corresponding row of fee_2021
## GOING ACROSS THE ROWS we use axis = 1

df_ax1["total_sum"] = df_ax1[['fee_2020','fee_2021']].sum(axis = 1)
df_ax1

Unnamed: 0,firm,fee_2020,fee_2021,total_sum
0,Bilk Inc.,1150,1265.0,2415.0
1,Vine & Co.,2300,2530.0,4830.0
2,Kiln Inc.,3400,3740.0,7140.0
3,Y & Y Consulting,1500,1650.0,3150.0
4,Trending Inc.,6500,7150.0,13650.0


In [7]:
## We can do the same by using axis = "columns"
## GOING ACROSS THE ROWS we use axis = 1

df_ax1["total_col"] = df_ax1[['fee_2020','fee_2021']].sum(axis = "columns")
df_ax1

Unnamed: 0,firm,fee_2020,fee_2021,total_sum,total_col
0,Bilk Inc.,1150,1265.0,2415.0,2415.0
1,Vine & Co.,2300,2530.0,4830.0,4830.0
2,Kiln Inc.,3400,3740.0,7140.0,7140.0
3,Y & Y Consulting,1500,1650.0,3150.0,3150.0
4,Trending Inc.,6500,7150.0,13650.0,13650.0


In [8]:
## we drop the total_col
df_ax1.drop(["total_col"], axis = "columns", inplace = True)
df_ax1

Unnamed: 0,firm,fee_2020,fee_2021,total_sum
0,Bilk Inc.,1150,1265.0,2415.0
1,Vine & Co.,2300,2530.0,4830.0
2,Kiln Inc.,3400,3740.0,7140.0
3,Y & Y Consulting,1500,1650.0,3150.0
4,Trending Inc.,6500,7150.0,13650.0


In [9]:
## We use lambda to do a calculation across axis 1
df_ax1["total_lambda"] = df_ax1.apply(lambda x: x["fee_2020"] + x["fee_2021"], axis= 1) 
df_ax1

Unnamed: 0,firm,fee_2020,fee_2021,total_sum,total_lambda
0,Bilk Inc.,1150,1265.0,2415.0,2415.0
1,Vine & Co.,2300,2530.0,4830.0,4830.0
2,Kiln Inc.,3400,3740.0,7140.0,7140.0
3,Y & Y Consulting,1500,1650.0,3150.0,3150.0
4,Trending Inc.,6500,7150.0,13650.0,13650.0


Frankly, i rarely use axis = 0 and don't have an example.

This is because functions like .mean(), .sum() when told what column to operate on give you axis = 0 functionality automatically as below.

In [10]:
## create a df for axis = 0 
df_ax0 = df.copy()
df_ax0

Unnamed: 0,firm,fee_2020,fee_2021
0,Bilk Inc.,1150,1265.0
1,Vine & Co.,2300,2530.0
2,Kiln Inc.,3400,3740.0
3,Y & Y Consulting,1500,1650.0
4,Trending Inc.,6500,7150.0


In [11]:
## note how the calculation is done vertically to sum all the items in fee_2020
df_ax0[["fee_2020", "fee_2021"]].sum()




fee_2020    14850.0
fee_2021    16335.0
dtype: float64

<img src="../img/axis.png">