### loc

In [2]:
import pandas as pd
data = pd.DataFrame({
    "name": ["fire","air", "water"],
    "numbers":[1,2,3]
}, index = ["a","b","c"])


In [3]:
data

Unnamed: 0,name,numbers
a,fire,1
b,air,2
c,water,3


In [4]:
data.loc["b"]

name       air
numbers      2
Name: b, dtype: object

In [5]:
data.loc[['a', 'c']]

Unnamed: 0,name,numbers
a,fire,1
c,water,3


In [6]:
#creating a slightly complex df to use to better undersatnd
import numpy as np
df = pd.DataFrame({
    "employee": ["Keshav", "Afroze", "Nico", "Sravya", "Ethan", "Fiona"],
    "department": ["HR", "Finance", "IT", "Finance", "IT", "HR"],
    "salary": [500000, 600000, np.nan, 720000, 550000, 580000],
    "joining_date": pd.to_datetime([
        "2020-01-15", "2019-03-10", "2021-06-25", 
        "2018-07-01", "2022-02-14", "2021-11-11"
    ]),
    "bonus": [50000, 7000000, 40000, 80000, np.nan, 60000]
}, index=["E1", "E2", "E3", "E4", "E5", "E6"])

In [7]:
print(df)

   employee department    salary joining_date      bonus
E1   Keshav         HR  500000.0   2020-01-15    50000.0
E2   Afroze    Finance  600000.0   2019-03-10  7000000.0
E3     Nico         IT       NaN   2021-06-25    40000.0
E4   Sravya    Finance  720000.0   2018-07-01    80000.0
E5    Ethan         IT  550000.0   2022-02-14        NaN
E6    Fiona         HR  580000.0   2021-11-11    60000.0


In [14]:
df.head()

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020-01-15,50000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E3,Nico,IT,,2021-06-25,40000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0
E5,Ethan,IT,550000.0,2022-02-14,


In [16]:
df.loc["E5"]

employee                      Ethan
department                       IT
salary                     550000.0
joining_date    2022-02-14 00:00:00
bonus                           NaN
Name: E5, dtype: object

In [18]:
df.loc[:"E3"]

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020-01-15,50000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E3,Nico,IT,,2021-06-25,40000.0


In [20]:
df.loc[:'E4', 'department':'salary']

Unnamed: 0,department,salary
E1,HR,500000.0
E2,Finance,600000.0
E3,IT,
E4,Finance,720000.0


In [22]:
df.loc[:'E4','department':'bonus']

Unnamed: 0,department,salary,joining_date,bonus
E1,HR,500000.0,2020-01-15,50000.0
E2,Finance,600000.0,2019-03-10,7000000.0
E3,IT,,2021-06-25,40000.0
E4,Finance,720000.0,2018-07-01,80000.0


In [24]:
df.loc[df["department"]=="Finance"] 

Unnamed: 0,employee,department,salary,joining_date,bonus
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0


In [26]:
df.loc[(df["department"]=="Finance")&(df["salary"] > 65000),:]

Unnamed: 0,employee,department,salary,joining_date,bonus
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0


In [29]:
df.loc[df["department"]=="IT",["employee","bonus"]]

Unnamed: 0,employee,bonus
E3,Nico,40000.0
E5,Ethan,


In [31]:
#updating the nan value in it with the mean using loc here
avg_sal_it= df.loc[df["department"]=="IT", "salary"].mean()
df.loc[(df["department"]=="IT")&(df["salary"].isna()),"salary"]=avg_sal_it

In [33]:
df.head()

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020-01-15,50000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E3,Nico,IT,550000.0,2021-06-25,40000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0
E5,Ethan,IT,550000.0,2022-02-14,


In [35]:
avg_bonus = 1000
df.loc[(df.department == "IT")&df.bonus.isna(),"bonus"]=avg_bonus

In [39]:
df.head()

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020-01-15,50000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E3,Nico,IT,550000.0,2021-06-25,40000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0
E5,Ethan,IT,550000.0,2022-02-14,1000.0


In [57]:
df.salary.dtype

dtype('float64')

In [61]:
numeric_cols = [col for col in df.columns if df[col].dtype =="float64"]
df.loc[df.department == "Finance", numeric_cols]
#used list comp here to get the float64 cols frim finance

Unnamed: 0,salary,bonus
E2,600000.0,7000000.0
E4,720000.0,80000.0


In [69]:
### getting the cols emp and sal for even rows 

In [63]:
even_rows = [idx for idx in df.index]
even_rows

['E1', 'E2', 'E3', 'E4', 'E5', 'E6']

In [65]:
even_rows = [idx for idx in df.index if int(idx[1:]) % 2 == 0]

In [67]:
df.loc[even_rows, ["employee", "salary"]]

Unnamed: 0,employee,salary
E2,Afroze,600000.0
E4,Sravya,720000.0
E6,Fiona,580000.0


In [71]:
#using list comp and loc now

In [73]:
#adding a new column

In [89]:
#Total comp =salary+ bonus
df["total_comp"]=[
    salary + (bonus if df.bonus is not None else 0) for salary,bonus in zip(df.salary, df.bonus)
]

In [91]:
df.head()

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E1,Keshav,HR,500000.0,2020-01-15,50000.0,550000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0,7600000.0
E3,Nico,IT,550000.0,2021-06-25,40000.0,590000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0


### iloc

In [41]:
df.iloc[2]

employee                       Nico
department                       IT
salary                     550000.0
joining_date    2021-06-25 00:00:00
bonus                       40000.0
Name: E3, dtype: object

In [96]:
df.iloc[[0, 2, 4]]

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E1,Keshav,HR,500000.0,2020-01-15,50000.0,550000.0
E3,Nico,IT,550000.0,2021-06-25,40000.0,590000.0
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0


In [98]:
df.iloc[1:4]

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0,7600000.0
E3,Nico,IT,550000.0,2021-06-25,40000.0,590000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0


In [100]:
df.iloc[[0, 2], [0, 2]]

Unnamed: 0,employee,salary
E1,Keshav,500000.0
E3,Nico,550000.0


In [102]:
df.iloc[:, 2]

E1    500000.0
E2    600000.0
E3    550000.0
E4    720000.0
E5    550000.0
E6    580000.0
Name: salary, dtype: float64

In [104]:
df.iloc[3, 2]

720000.0

In [106]:
df.iloc[0,4]

50000.0

In [108]:
df.iloc[0:4]

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E1,Keshav,HR,500000.0,2020-01-15,50000.0,550000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0,7600000.0
E3,Nico,IT,550000.0,2021-06-25,40000.0,590000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0


In [110]:
df.iloc[[0, 2,4],[1, 3]]

Unnamed: 0,department,joining_date
E1,HR,2020-01-15
E3,IT,2021-06-25
E5,IT,2022-02-14


In [112]:
#doing the even thing again
even = [i % 2 == 0 for i in range(len(df))]
df.iloc[even]

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E1,Keshav,HR,500000.0,2020-01-15,50000.0,550000.0
E3,Nico,IT,550000.0,2021-06-25,40000.0,590000.0
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0


In [118]:
#adding sal t othe first three employees
df.iloc[0:3, 2] += 1000

In [116]:
df.head()

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E1,Keshav,HR,501000.0,2020-01-15,50000.0,550000.0
E2,Afroze,Finance,601000.0,2019-03-10,7000000.0,7600000.0
E3,Nico,IT,551000.0,2021-06-25,40000.0,590000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0


In [120]:
#doubling the sal for the first 3 emps
df.iloc[0:3,2] =[s*2 for s in df.iloc[0:3, 2]]

In [122]:
df.head()

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E1,Keshav,HR,1004000.0,2020-01-15,50000.0,550000.0
E2,Afroze,Finance,1204000.0,2019-03-10,7000000.0,7600000.0
E3,Nico,IT,1104000.0,2021-06-25,40000.0,590000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0


In [124]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp
E1,Keshav,HR,1004000.0,2020-01-15,50000.0,550000.0
E2,Afroze,Finance,1204000.0,2019-03-10,7000000.0,7600000.0
E3,Nico,IT,1104000.0,2021-06-25,40000.0,590000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0
E6,Fiona,HR,580000.0,2021-11-11,60000.0,640000.0


In [126]:
#high earners, trying to creat a bool col
df["high_earner"] = [True if s > 100000 else False for s in df.iloc[:, 2]]


In [128]:
df.high_earner

E1    True
E2    True
E3    True
E4    True
E5    True
E6    True
Name: high_earner, dtype: bool

In [130]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp,high_earner
E1,Keshav,HR,1004000.0,2020-01-15,50000.0,550000.0,True
E2,Afroze,Finance,1204000.0,2019-03-10,7000000.0,7600000.0,True
E3,Nico,IT,1104000.0,2021-06-25,40000.0,590000.0,True
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0,True
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0,True
E6,Fiona,HR,580000.0,2021-11-11,60000.0,640000.0,True


In [136]:
#sal > 500000
high_salary_names = [df.iloc[i, 0] for i in range(len(df)) if df.iloc[i, 2] > 700000]


In [138]:
high_salary_names

['Keshav', 'Afroze', 'Nico', 'Sravya']

In [140]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus,total_comp,high_earner
E1,Keshav,HR,1004000.0,2020-01-15,50000.0,550000.0,True
E2,Afroze,Finance,1204000.0,2019-03-10,7000000.0,7600000.0,True
E3,Nico,IT,1104000.0,2021-06-25,40000.0,590000.0,True
E4,Sravya,Finance,720000.0,2018-07-01,80000.0,800000.0,True
E5,Ethan,IT,550000.0,2022-02-14,1000.0,551000.0,True
E6,Fiona,HR,580000.0,2021-11-11,60000.0,640000.0,True
