## Join and Concatenate

##### In Pandas, join and concatenate are two important operations used to combine multiple DataFrames into one.
##### commonly used in data preprocessing and data analysis, especially when working with data from different sources.

In [1]:
# join() is used to combine two DataFrames based on the index or a key column (like SQL joins).

# You can perform:

# inner join → keeps only matching rows.(Intersection)

# outer join → keeps all rows, fills missing with NaN.(All from left)

# left join → keeps all rows from the left DataFrame.(All from Right)

# right join → keeps all rows from the right DataFrame.(Union)
#  Why use join()?
# To combine two datasets that share a common key (like "user_id", "student_id", etc.)
# 2. concat() (concatenate) in Pandas
# concat() is used to combine DataFrames vertically (stack rows) or horizontally (stack columns).

# Syntax: pd.concat([df1, df2], axis=0) for rows, axis=1 for columns.

# Why use concat()?
# To append data (like daily logs or monthly sales) or merge columns with the same or related index.

In [2]:
import pandas as pd

In [3]:
df_employees = pd.DataFrame({
  'emp-id' : [1,2,3,4],
  'name': ['Alice','Bob','Charlie','David'],
  'dept_id':['101','102','103','104']  
})

In [4]:
df_employees

Unnamed: 0,emp-id,name,dept_id
0,1,Alice,101
1,2,Bob,102
2,3,Charlie,103
3,4,David,104


In [5]:
##Create Department Dataframe 

In [6]:
df_departments = pd.DataFrame({
    'dept_id' : [101,102,105],
    'dept_name' : ['HR','Finance','IT']
})

In [7]:
df_departments

Unnamed: 0,dept_id,dept_name
0,101,HR
1,102,Finance
2,105,IT


In [14]:
## Inner Join
pd.merge(df_employees, df_departments, on='dept_id', how='inner')

Unnamed: 0,emp-id,name,dept_id,dept_name
0,1,Alice,101,HR
1,2,Bob,102,Finance


In [15]:
## Right Join

In [None]:
pd.merge(df_employees, df_departments, on='dept_id', how='right') #Right ko table laii prirority diyera rakhxa data ie. dept_name and dept_Id here

Unnamed: 0,emp-id,name,dept_id,dept_name
0,1.0,Alice,101,HR
1,2.0,Bob,102,Finance
2,,,105,IT


In [20]:
pd.merge(df_employees, df_departments, on='dept_id', how='left') # Gives priority to left table ie. emp_id and name

Unnamed: 0,emp-id,name,dept_id,dept_name
0,1,Alice,101,HR
1,2,Bob,102,Finance
2,3,Charlie,103,
3,4,David,104,


In [None]:
## Full outer Join
    #Union of both tables 
pd.merge(df_employees,df_departments, on = 'dept_id', how ='outer' )

Unnamed: 0,emp-id,name,dept_id,dept_name
0,1.0,Alice,101,HR
1,2.0,Bob,102,Finance
2,3.0,Charlie,103,
3,4.0,David,104,
4,,,105,IT


In [25]:
## Different columns names:
df_departments.rename(columns = {'dept_id':'id'},inplace = True)

In [26]:
df_departments

Unnamed: 0,id,dept_name
0,101,HR
1,102,Finance
2,105,IT


In [27]:
## If there is no any common column in tables we use left on and right on to join the table
pd.merge(df_employees,df_departments, how = 'inner', left_on='dept_id',right_on= 'id')

Unnamed: 0,emp-id,name,dept_id,id,dept_name
0,1,Alice,101,101,HR
1,2,Bob,102,102,Finance


In [33]:
pd.merge(df_employees,df_departments, how = 'inner', left_on='dept_id',right_on= 'id')

Unnamed: 0,emp-id,name,dept_id,id,dept_name
0,1,Alice,101,101,HR
1,2,Bob,102,102,Finance


#### Loc and ILoc 

Used for fetching data from index 

In [None]:
#Sample DataFrame
df = pd.DataFrame({
    'Name' : ['Alice','Bob','Charlie'],
    'Age':[25,30,35]
}, index = ['a','b','c']) # Changes indexes from 0,1,2 -----> a,b,c

In [35]:
df

Unnamed: 0,Name,Age
a,Alice,25
b,Bob,30
c,Charlie,35


loc: Label-based indexing
Uses labels (names of rows or columns).
-"l" for label.
-Includes both row labels and column labels.

In [37]:
df.loc['a']

Name    Alice
Age        25
Name: a, dtype: object

In [None]:
df.loc['a','Age']

np.int64(25)

In [39]:
df.loc['a','Name']

'Alice'

### iloc: Index Location (Integer-based indexing)
Uses integer positions (like Python lists).
-"i" for integer.
-Zero-based indexing.

In [43]:
df.iloc[0]     # First row
df.iloc[2, 1]  # 3rd row, 2nd column (by position)

np.int64(35)

In [40]:
df.iloc[0]

Name    Alice
Age        25
Name: a, dtype: object

In [None]:
df.iloc[0,0]

'Alice'

In [42]:
df.iloc[2,1]

np.int64(35)