In [1]:
import pandas as pd
import numpy as np

data = {
    'Name': [' Alice ', 'bob', 'Charlie', 'bob', 'D@niel', 'Eva', np.nan],
    'Age': ['25', 'thirty', '35', '29', '40', '', '22'],
    'Salary': ['5000$', '6000$', 'NaN', '5500$', None, '5200$', '4800$'],
    'Join_Date': ['2021/01/05', '05-02-2021', '2021.03.07', '2021-04-10', '10-05-21', 'June 15, 2021', ''],
    'Department': ['Sales', 'sales', 'HR', 'Hr', 'IT', None, 'SALES']
}

df = pd.DataFrame(data)
print(df)

      Name     Age Salary      Join_Date Department
0   Alice       25  5000$     2021/01/05      Sales
1      bob  thirty  6000$     05-02-2021      sales
2  Charlie      35    NaN     2021.03.07         HR
3      bob      29  5500$     2021-04-10         Hr
4   D@niel      40   None       10-05-21         IT
5      Eva          5200$  June 15, 2021       None
6      NaN      22  4800$                     SALES


In [2]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000$,2021/01/05,Sales
1,bob,thirty,6000$,05-02-2021,sales
2,Charlie,35,,2021.03.07,HR
3,bob,29,5500$,2021-04-10,Hr
4,D@niel,40,,10-05-21,IT
5,Eva,,5200$,"June 15, 2021",
6,,22,4800$,,SALES


In [3]:
df['Name'] = df['Name'].str.strip().str.title().str.replace('@',"a",regex = True)

In [4]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000$,2021/01/05,Sales
1,Bob,thirty,6000$,05-02-2021,sales
2,Charlie,35,,2021.03.07,HR
3,Bob,29,5500$,2021-04-10,Hr
4,DaNiel,40,,10-05-21,IT
5,Eva,,5200$,"June 15, 2021",
6,,22,4800$,,SALES


In [5]:
def convert_age(val):
    val = str(val).strip().lower()
    if val.isdigit():
        return int(val)
    elif val in data:
        return data[val]
    else:
        return np.nan

df['Age'] = df['Age'].apply(convert_age)

df['Age'] = df['Age'].fillna(df['Age'].mean()).astype(int)

print(df['Age'])

0    25
1    30
2    35
3    29
4    40
5    30
6    22
Name: Age, dtype: int64


In [6]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000$,2021/01/05,Sales
1,Bob,30,6000$,05-02-2021,sales
2,Charlie,35,,2021.03.07,HR
3,Bob,29,5500$,2021-04-10,Hr
4,DaNiel,40,,10-05-21,IT
5,Eva,30,5200$,"June 15, 2021",
6,,22,4800$,,SALES


In [7]:
df['Age'] = df['Age'].fillna(df['Age'].mean()).astype(int)

In [8]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000$,2021/01/05,Sales
1,Bob,30,6000$,05-02-2021,sales
2,Charlie,35,,2021.03.07,HR
3,Bob,29,5500$,2021-04-10,Hr
4,DaNiel,40,,10-05-21,IT
5,Eva,30,5200$,"June 15, 2021",
6,,22,4800$,,SALES


In [9]:
df['Salary'] = df['Salary'].str.replace('$', '', regex=False)
df['Salary'] = pd.to_numeric(df['Salary'], errors='coerce')

In [10]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000.0,2021/01/05,Sales
1,Bob,30,6000.0,05-02-2021,sales
2,Charlie,35,,2021.03.07,HR
3,Bob,29,5500.0,2021-04-10,Hr
4,DaNiel,40,,10-05-21,IT
5,Eva,30,5200.0,"June 15, 2021",
6,,22,4800.0,,SALES


In [11]:
df['Salary'] = df['Salary'].fillna(df['Salary'].mean())


In [12]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000.0,2021/01/05,Sales
1,Bob,30,6000.0,05-02-2021,sales
2,Charlie,35,5300.0,2021.03.07,HR
3,Bob,29,5500.0,2021-04-10,Hr
4,DaNiel,40,5300.0,10-05-21,IT
5,Eva,30,5200.0,"June 15, 2021",
6,,22,4800.0,,SALES


In [15]:
def clean_format_date(val):
    if pd.isna(val):
        return np.nan
    val = str(val).replace('/', '-').replace('.', '-').replace(',', '-')
    try:
        date = pd.to_datetime(val, dayfirst=True, errors='coerce')
        if pd.notna(date):
            return date.strftime('%d-%m-%Y')  
        else:
            return np.nan
    except:
        return np.nan

# تطبيق التنظيف على عمود التاريخ
df['Join_Date'] = df['Join_Date'].apply(clean_format_date)

In [16]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000.0,01-05-2021,Sales
1,Bob,30,6000.0,05-02-2021,sales
2,Charlie,35,5300.0,03-07-2021,HR
3,Bob,29,5500.0,04-10-2021,Hr
4,DaNiel,40,5300.0,10-05-2021,IT
5,Eva,30,5200.0,15-06-2021,
6,,22,4800.0,,SALES


In [17]:
df['Department'] = df['Department'].str.strip().str.title()

In [18]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000.0,01-05-2021,Sales
1,Bob,30,6000.0,05-02-2021,Sales
2,Charlie,35,5300.0,03-07-2021,Hr
3,Bob,29,5500.0,04-10-2021,Hr
4,DaNiel,40,5300.0,10-05-2021,It
5,Eva,30,5200.0,15-06-2021,
6,,22,4800.0,,Sales


In [20]:
most_common = df['Department'].mode()[0]   
df['Department'] = df['Department'].fillna(most_common)

In [21]:
df

Unnamed: 0,Name,Age,Salary,Join_Date,Department
0,Alice,25,5000.0,01-05-2021,Sales
1,Bob,30,6000.0,05-02-2021,Sales
2,Charlie,35,5300.0,03-07-2021,Hr
3,Bob,29,5500.0,04-10-2021,Hr
4,DaNiel,40,5300.0,10-05-2021,It
5,Eva,30,5200.0,15-06-2021,Sales
6,,22,4800.0,,Sales
