## Usefull functionalities

### Lecture agenda

- Head and tail
- Handling missing values
- Handling duplicates
- Droping rows/columns
- Sorting
- Renaming rows/columns
- Mapping

In [None]:
import pandas as pd
import numpy as np

#### Head and tail

In [None]:
df = pd.DataFrame({
    'A': [1, 2, np.nan, 4, 5],
    'B': [np.nan, 7, 8, 9, 10],
    'C': ['a', 'b', 'c', 'd', 'e'],
    'D': [100, 200, 300, 400, 500]
})

df

In [None]:
df.head(2)

In [None]:
df.tail(2)

#### Missing values in dataframes

In [None]:
# None can represent missing value
var = None

# Can be detected with ==
print(var == None)

# Can be detected with is None
print(var is None)

In [None]:
# np.nan is also used to represent missing values
var = np.nan

print(var==np.nan)
print(var is np.nan)
print(np.isnan(var))

#### Handling missing values

In [None]:
# Create a DataFrame with missing values
df = pd.DataFrame({
    'A': [1, 2, np.nan, 4, 5],
    'B': [np.nan, 7, 8, 9, 10],
    'C': [np.nan, 12, None, 14, 15]
})

df

In [None]:
# Check for missing values
df.isna()

In [None]:
# Count number of missing values in columns
df.isna().sum(axis=0)

In [None]:
# Remove rows containing missing values
df.dropna(axis=0)

In [None]:
# Remove rows containing missing values
df.dropna(ignore_index=True)

In [None]:
# Remove only rows where certain columns
# have missing values
df = pd.DataFrame({
    'A': [1, 2, 3, 4, np.nan],
    'B': [np.nan, 7, 8, 9, 10],
    'C': [11, 12, np.nan, 14, 15]
})

print('Original : ')
display(df)

print('After dropna : ')
df = df.dropna(subset=['A', 'B'])
df

In [None]:
# Remove cols containing missing values
df = pd.DataFrame({
    'A': [1, 2, np.nan, 4, 5],
    'B': [np.nan, 7, 8, 9, 10],
    'C': [11, 12, np.nan, 14, 15],
    'D': [100, 200, 300, 400, 500]
})

print('Original : ')
display(df)

print('After dropna : ')
df = df.dropna(axis=1)
df

#### Handling duplicate values

In [None]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Alice', 'Eva', 'Frank', 'Grace', 'Alice'],
    'Age': [20, 19, 21, 20, 22, 21, 19, 20],
    'Subject': ['Math', 'Physics', 'Chemistry', 'Math', 'Physics', 'Chemistry', 'Math', 'Physics']
}

df = pd.DataFrame(data)
df

In [None]:
df.duplicated()

In [None]:
df.duplicated(keep=False)

In [None]:
df

In [None]:
df.duplicated(subset=['Name'])

In [None]:
df.drop_duplicates()

In [None]:
df.drop_duplicates(keep=False)

In [None]:
df.drop_duplicates(subset=['Name'])

In [None]:
# Drop and reset index
df.drop_duplicates(ignore_index=True)

#### Drop columns / rows

In [None]:
data = {
    'P': [1, 2, 3, 4, 5],
    'b': ['a', 'b', 'c', 'd', 'e'],
    'C': [1.1, 2.2, 3.3, 4.4, 5.5],
    'D': [10, 20, 30, 40, 50],
    'E': ['x', 'y', 'z', 'xx', 'yy'],
}

# Create a pandas DataFrame from the dictionary
df = pd.DataFrame(data)

df

In [None]:
# Drop columns by name
df = df.drop(columns=['P'])
df

In [None]:
# Drop rows by name
df = df.drop(index=[2])
df

In [None]:
# Select columns to keep
df = df.filter(['b', 'C'], axis=1)
df

In [None]:
# Select rows to keep
df = df.filter([0, 3], axis=0)
df

#### Sorting

In [None]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Harry'],
    'Age': [22, 19, 21, 20, 22, 21, 19, 20],
    'Grade': [85, 95, 77, 88, 92, 76, 99, 89],
    'Subject': ['Math', 'Physics', 'Chemistry', 'Math', 'Physics', 'Chemistry', 'Math', 'Physics']
}

df = pd.DataFrame(data)
df

In [None]:
# Sort by index
df.sort_index()


In [None]:
# Reverse order
df.sort_index(ascending=False)

In [None]:
# Sort by valus
df.sort_values(by='Age')


In [None]:
# Sort by value
df.sort_values(by='Age', ignore_index=True)


In [None]:
# Sort by value descending
df.sort_values(by='Age', ascending=False)


In [None]:
df.sort_values(by=['Age', 'Grade'])

#### Rename columns / rows

In [None]:
df = pd.DataFrame({
    'A': [1, 2, np.nan, 4, 5],
    'B': [np.nan, 7, 8, 9, 10],
    'C': ['a', 'b', 'c', 'd', 'e'],
    'D': [100, 200, 300, 400, 500]
})

df

In [None]:
# Rename columns
df = df.rename(mapper={'A': 'P', 'B':'b'}, axis=1)
df

In [None]:
# Rename rows
df = df.rename(mapper={0: 'zero', 1: 'one'}, axis=0)
df

#### Mapping

In [None]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Harry'],
    'Age': [22, 19, 21, 20, 22, 21, 19, 20],
    'Grade': [20, 95, 77, 88, 30, 76, 99, 89],
    'Subject': ['Math', 'Physics', 'Chemistry', 'Math', 'Physics', 'Chemistry', 'Math', 'Physics']
}

df = pd.DataFrame(data)
df

In [None]:
map_dict = {
    'Math' : 'M102',
    'Physics': 'P102',
    'Chemistry': 'C102'
}

In [None]:
df['Subject'] = df['Subject'].map(map_dict)
df

In [None]:
def check_grade(grade):
    
    if grade > 50:
        
        return 'passed'
    
    else:
        
        return 'failed'

In [None]:
df['Status_1'] = df['Grade'].map(check_grade)
df

In [None]:
df['Status'] = df['Grade'].map(lambda x: 'passed' if x > 50 else 'failed')
df