In [1]:
import pandas as pd

# Customers DataFrame
customers = pd.DataFrame({
    'CustomerID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David']
})

# Orders DataFrame
orders = pd.DataFrame({
    'OrderID': [101, 102, 103, 104],
    'CustomerID': [1, 1, 3, 5],
    'Product': ['Laptop', 'Mouse', 'Keyboard', 'Monitor']
})


In [2]:
inner_join = pd.merge(customers, orders, on='CustomerID', how='inner')
print(inner_join)


   CustomerID     Name  OrderID   Product
0           1    Alice      101    Laptop
1           1    Alice      102     Mouse
2           3  Charlie      103  Keyboard


In [None]:
left_join = pd.merge(customers, orders, on='CustomerID', how='left')
print(left_join)


   CustomerID     Name  OrderID   Product
0           1    Alice    101.0    Laptop
1           1    Alice    102.0     Mouse
2           2      Bob      NaN       NaN
3           3  Charlie    103.0  Keyboard
4           4    David      NaN       NaN


In [4]:
right_join = pd.merge(customers, orders, on='CustomerID', how='right')
print(right_join)


   CustomerID     Name  OrderID   Product
0           1    Alice      101    Laptop
1           1    Alice      102     Mouse
2           3  Charlie      103  Keyboard
3           5      NaN      104   Monitor


In [5]:
full_join = pd.merge(customers, orders, on='CustomerID', how='outer')
print(full_join)


   CustomerID     Name  OrderID   Product
0           1    Alice    101.0    Laptop
1           1    Alice    102.0     Mouse
2           2      Bob      NaN       NaN
3           3  Charlie    103.0  Keyboard
4           4    David      NaN       NaN
5           5      NaN    104.0   Monitor


In [11]:
import pandas as pd

# Employee data (internal ID system)
employees = pd.DataFrame({
    'emp_id': [101, 102, 103],
    'name': ['Alice', 'Bob', 'Charlie']
})

# Payroll system (external ID system)
payroll = pd.DataFrame({
    'employee_number': [101, 102, 104],
    'salary': [70000, 80000, 60000]
})


In [12]:
result = pd.merge(employees, payroll,
                  left_on='emp_id',
                  right_on='employee_number',
                  how='left')
print(result)


   emp_id     name  employee_number   salary
0     101    Alice            101.0  70000.0
1     102      Bob            102.0  80000.0
2     103  Charlie              NaN      NaN


In [13]:
inner_result = pd.merge(employees, payroll,
                        left_on='emp_id',
                        right_on='employee_number',
                        how='inner')
print(inner_result)


   emp_id   name  employee_number  salary
0     101  Alice              101   70000
1     102    Bob              102   80000


In [14]:
result = result.drop(columns='employee_number')
print(result)

   emp_id     name   salary
0     101    Alice  70000.0
1     102      Bob  80000.0
2     103  Charlie      NaN


In [20]:
import pandas as pd
import numpy as np

data = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', np.nan, 'Eve'],
    'Age': [25, np.nan, 30, 22, np.nan],
    'Salary': [50000, 60000, np.nan, 45000, 52000]
})
print(data)


      Name   Age   Salary
0    Alice  25.0  50000.0
1      Bob   NaN  60000.0
2  Charlie  30.0      NaN
3      NaN  22.0  45000.0
4      Eve   NaN  52000.0


In [16]:
data.isnull()
data.isnull().sum()
print(data)

      Name   Age   Salary
0    Alice  25.0  50000.0
1      Bob   NaN  60000.0
2  Charlie  30.0      NaN
3      NaN  22.0  45000.0
4      Eve   NaN  52000.0


In [17]:
data.dropna(axis=1)
print(data)


      Name   Age   Salary
0    Alice  25.0  50000.0
1      Bob   NaN  60000.0
2  Charlie  30.0      NaN
3      NaN  22.0  45000.0
4      Eve   NaN  52000.0


In [18]:
data['Age'].fillna(data['Age'].mean(), inplace=True)
data['Salary'].fillna(data['Salary'].median(), inplace=True)
print(data)

      Name        Age   Salary
0    Alice  25.000000  50000.0
1      Bob  25.666667  60000.0
2  Charlie  30.000000  51000.0
3      NaN  22.000000  45000.0
4      Eve  25.666667  52000.0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Age'].fillna(data['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Salary'].fillna(data['Salary'].median(), inplace=True)


In [25]:
data.replace(to_replace=np.nan, value='Missing', inplace=True)
print(data)

      Name      Age   Salary
0    Alice     25.0  50000.0
1      Bob  Missing  60000.0
2  Charlie     30.0  Missing
3  Missing     22.0  45000.0
4      Eve  Missing  52000.0


In [22]:
filtered = data[data['Salary'].notnull()]
print(filtered)

    Name   Age   Salary
0  Alice  25.0  50000.0
1    Bob   NaN  60000.0
3    NaN  22.0  45000.0
4    Eve   NaN  52000.0
