In [1]:
import pandas as pd

## 1. Read a CSV file and filter rows based on a specific column

In [2]:
# Read CSV file
file_path = 'input.csv'

try:    
    df = pd.read_csv(file_path)

except FileNotFoundError:
    print('File not found')

In [3]:
df.dtypes

timestamp      object
id              int64
name           object
age           float64
salary        float64
department     object
location       object
status         object
score         float64
remarks        object
dtype: object

In [4]:
df.head()

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
3,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average


In [5]:
# target column name
col_name = 'score'

# keep only the rows where column_name is not null
filtered_df = df[df[col_name].notna()]
    
filtered_df

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
10,2024-02-23 10:23:50,109,William,32.0,58000.0,Marketing,Chicago,Inactive,80.0,Good


## 2. Merge two dataframes using a common column

In [6]:
# Example DataFrame

df1 = pd.DataFrame(
    {
        'id' : [1, 2, 3],
        'name' : ['Alice', 'Bob', 'Suzy']
    }
)

df2 = pd.DataFrame(
    {
        'id' : [1, 2, 4],
        'age' : [25, 20, 28]
    }
)

### Joins in Pandas

In [7]:
# INNER JOIN : Keeps only matching rows
inner_join = pd.merge(df1, df2, on='id', how='inner')

# LEFT JOIN : Keeps all rows from df1, fills unmatched with NaN
left_join = pd.merge(df1, df2, on='id', how='left')

# RIGHT JOIN : Keeps all rows from df2, fills unmatched with NaN
right_join = pd.merge(df1, df2, on='id', how='right')

# OUTER JOIN : Keeps all rows from both, fills unmatched with NaN
outer_join = pd.merge(df1, df2, on='id', how='outer')


# print results
print("Inner Join:\n", inner_join)
print("\nLeft Join:\n", left_join)
print("\nRight Join:\n", right_join)
print("\nOuter Join:\n", outer_join)

Inner Join:
    id   name  age
0   1  Alice   25
1   2    Bob   20

Left Join:
    id   name   age
0   1  Alice  25.0
1   2    Bob  20.0
2   3   Suzy   NaN

Right Join:
    id   name  age
0   1  Alice   25
1   2    Bob   20
2   4    NaN   28

Outer Join:
    id   name   age
0   1  Alice  25.0
1   2    Bob  20.0
2   3   Suzy   NaN
3   4    NaN  28.0


## 3. Write a DataFrame to a CSV file without the index

In [8]:
df1.to_csv('Output.csv', index=False)

## 4. Handle missing data by replacing NaN values

### 1. Replace NaN with 0 or string

In [9]:
df.fillna(0)


Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,0.0,55000.0,IT,Los Angeles,Inactive,0.0,0
3,2024-02-23 10:17:00,103,James,0.0,55000.0,IT,Los Angeles,Inactive,0.0,0
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,0,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,0.0,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,0.0,0
8,2024-02-23 10:22:10,108,Emma,0.0,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,0.0,65000.0,Sales,Los Angeles,Active,92.0,Very Good


In [10]:
df.fillna('Missing')

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,Missing,55000.0,IT,Los Angeles,Inactive,Missing,Missing
3,2024-02-23 10:17:00,103,James,Missing,55000.0,IT,Los Angeles,Inactive,Missing,Missing
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,Missing,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,Missing,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,Missing,Missing
8,2024-02-23 10:22:10,108,Emma,Missing,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,Missing,65000.0,Sales,Los Angeles,Active,92.0,Very Good


In [11]:
col = 'score'

### 2. Fill NaN with Column Mean, Median or Mode

#### a. Fill NaN with Mean

In [12]:
df_copy = df.copy()
df_copy[col] = df_copy[col].fillna(df_copy[col].mean())

df_copy

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,87.125,
3,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,87.125,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,87.125,
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good


#### b. Fill NaN with Median


In [13]:
df_copy = df.copy()
df_copy[col] = df_copy[col].fillna(df_copy[col].median())

df_copy

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,89.0,
3,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,89.0,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,89.0,
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good


#### c. Fill NaN with Mode


In [14]:
df_copy = df.copy()
df_copy[col] = df_copy[col].fillna(df_copy[col].mode()[0])

df_copy

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,92.0,
3,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,92.0,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,92.0,
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good


### 3. Fill using Forward/Backward fill


In [15]:
df.fillna(method='ffill') # forward fill (copy previous value)

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,30.0,55000.0,IT,Los Angeles,Inactive,90.0,Excellent
3,2024-02-23 10:17:00,103,James,30.0,55000.0,IT,Los Angeles,Inactive,90.0,Excellent
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,Los Angeles,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,52000.0,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,95.0,Excellent
8,2024-02-23 10:22:10,108,Emma,27.0,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,27.0,65000.0,Sales,Los Angeles,Active,92.0,Very Good


In [16]:
df.fillna(method='bfill') # backward fill (copy next value)


Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,28.0,55000.0,IT,Los Angeles,Inactive,75.0,Average
3,2024-02-23 10:17:00,103,James,28.0,55000.0,IT,Los Angeles,Inactive,75.0,Average
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,Miami,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,70000.0,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,92.0,Very Good
8,2024-02-23 10:22:10,108,Emma,32.0,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,32.0,65000.0,Sales,Los Angeles,Active,92.0,Very Good


### 4. Fill NaN with Interpolation


In [17]:
df.interpolate() # Linear interpolation
df.interpolate(method='polynomial', order=2) # Polynomial interpolation


Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,30.059128,55000.0,IT,Los Angeles,Inactive,85.341367,
3,2024-02-23 10:17:00,103,James,26.582411,55000.0,IT,Los Angeles,Inactive,74.129855,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,65697.836167,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,94.235327,
8,2024-02-23 10:22:10,108,Emma,19.13224,65000.0,Sales,Los Angeles,Active,92.0,Very Good
9,2024-02-23 10:22:10,108,Emma,20.798907,65000.0,Sales,Los Angeles,Active,92.0,Very Good


### 5. Fill NaN with different values for each column


In [18]:
df.fillna(
    {
        'score' : 0,
        'remarks' : 'Missing',
        'age' : df['age'].mean()
    }
)

df[['age', 'score', 'remarks']]

Unnamed: 0,age,score,remarks
0,25.0,85.0,Good
1,30.0,90.0,Excellent
2,,,
3,,,
4,28.0,75.0,Average
5,35.0,88.0,Good
6,40.0,95.0,Excellent
7,27.0,,
8,,92.0,Very Good
9,,92.0,Very Good


### 6. Fill NaN using a function


In [19]:
df_copy = df.copy()
df_copy['age'] = df_copy['age'].apply(lambda x : df['age'].median() if pd.isna(x) else x)
df_copy[['id', 'name', 'age', 'salary']]

Unnamed: 0,id,name,age,salary
0,101,John,25.0,50000.0
1,102,Anna,30.0,60000.0
2,103,James,30.0,55000.0
3,103,James,30.0,55000.0
4,104,Maria,28.0,52000.0
5,105,Michael,35.0,
6,106,Sophia,40.0,70000.0
7,107,Liam,27.0,48000.0
8,108,Emma,30.0,65000.0
9,108,Emma,30.0,65000.0


### 7. Replace NaN in specific rows



In [20]:
doc = """
        1. df[col].isna()
                a. Checks which values in column col are NaN (Missing).
                b. Returns a Boolean Series (True for NaN, False otherwise).

        2. df.loc[df[col].isna(), col]
                a. df.loc[] selects rows where the condition is True (i.e., where column in NaN).
                b. Targets only the col column

        3. = 'custom value'
                a. assigns custom value to all selected NaN entries in col
            
"""



df_copy = df.copy()
df_copy.loc[df_copy['remarks'].isna(), 'remarks'] = 'Outstanding'
df_copy[['remarks']]

Unnamed: 0,remarks
0,Good
1,Excellent
2,Outstanding
3,Outstanding
4,Average
5,Good
6,Excellent
7,Outstanding
8,Very Good
9,Very Good


## 5. Convert a DataFrame column string to datetime format

In [21]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

print(df.dtypes) # check data type

timestamp     datetime64[ns]
id                     int64
name                  object
age                  float64
salary               float64
department            object
location              object
status                object
score                float64
remarks               object
dtype: object


## 6. Handling Duplicates

### 1. Remove duplicate rows 

In [22]:
df = df.drop_duplicates()
doc = """
    1. Remove duplicate rows, keeping the first occurrence.
    2. Equivalent to df.drop_duplicates(keep='first')
"""
df

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,Good
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,Excellent
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,,
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
10,2024-02-23 10:23:50,109,William,32.0,58000.0,Marketing,Chicago,Inactive,80.0,Good


### 2. Remove duplicate rows based on specific column(s)

In [23]:
# Removes duplicate values based on a specific column (col_name).
df_copy = df.copy()
df_copy = df_copy.drop_duplicates(subset=['remarks'])
df_copy

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good


In [24]:
# Can specify multiple columns:
df_copy = df_copy.drop_duplicates(subset=['status', 'location'])
df_copy

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good


### 3. Remove duplicates but keep the last occurrence

In [25]:
# Keeps the most recent occurrence instead of first
df = df.drop_duplicates(keep='last')

### 4. Remove duplicates but keep none (Drop all duplicates)

In [26]:
# Drops all rows that have duplicates (no first ot last kept)
df = df[df.duplicated(keep=False) == False]

### 5. Remove duplicates from a specific row (Row-wise unique values)

In [27]:
# index of the row to modify
row_idx = 2

# removes duplicates within the row while keeping the order
df.iloc[row_idx] = list(dict.fromkeys(df.iloc[row_idx]))

### 6. Remove Duplicate values in a specific column

In [28]:
# Removes duplicates from a single column, but keeps the original DataFrame shape (NaN replaces removes values).
df['remarks'] = df['remarks'].drop_duplicates()

### 7. Remove duplicate rows with a condition (keep highest ot lowest value)

In [29]:
# keeps only the row where value is highest for each id.
# if you have a DataFrame with duplicate IDs but want to keep the row with the highest value in another column:
# df = df.sort_values('value', ascending=False).drop_duplicates(subset=['id'], keep='first') 

### 8. Remove duplicates using .duplicated()

In [30]:
# find duplicates without dropping:
df[df.duplicated()]

# find non-duplicate rows:
df[~df.duplicated()]

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
4,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
5,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,
6,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,
7,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,,
8,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
10,2024-02-23 10:23:50,109,William,32.0,58000.0,Marketing,Chicago,Inactive,80.0,


### 9. Remove duplicates while resetting index

In [31]:
df = df.drop_duplicates().reset_index(drop=True)
df

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
3,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
4,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,
5,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,
6,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,,
7,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
8,2024-02-23 10:23:50,109,William,32.0,58000.0,Marketing,Chicago,Inactive,80.0,


### 10. Given a DataFrame with duplicate timestamps, keep only the latest entry

In [32]:
df = df.sort_values(by='timestamp').drop_duplicates(subset='id', keep='last')

doc = """
        1. df.sort_values(by='timestamp')
                a. Sorts the DataFrame by the 'timestamp' column in ascending order (oldest to newest).
                b. Ensures that the latest timestamp appears last for each id.

        2. .drop_duplicates(subset='id', keep='last')
                a. Removes duplicate rows based on the 'id' column.
                b. keep='last' -> keeps the last occurrence (i.e., the most recent timestamp for each id).
    """

df

Unnamed: 0,timestamp,id,name,age,salary,department,location,status,score,remarks
0,2024-02-23 10:15:30,101,John,25.0,50000.0,Sales,New York,Active,85.0,Good
1,2024-02-23 10:16:45,102,Anna,30.0,60000.0,HR,Chicago,Active,90.0,Excellent
2,2024-02-23 10:17:00,103,James,,55000.0,IT,Los Angeles,Inactive,,
3,2024-02-23 10:18:20,104,Maria,28.0,52000.0,Marketing,,Active,75.0,Average
4,2024-02-23 10:19:10,105,Michael,35.0,,Finance,Miami,Inactive,88.0,
5,2024-02-23 10:20:05,106,Sophia,40.0,70000.0,HR,Chicago,Active,95.0,
6,2024-02-23 10:21:30,107,Liam,27.0,48000.0,IT,New York,Active,,
7,2024-02-23 10:22:10,108,Emma,,65000.0,Sales,Los Angeles,Active,92.0,Very Good
8,2024-02-23 10:23:50,109,William,32.0,58000.0,Marketing,Chicago,Inactive,80.0,
