In [3]:
import pandas as pd

def load_datasets(file1, file2):
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    return df1, df2

def handle_missing_values(df):
    for col in df.columns:
        if df[col].dtype == 'O':
            df[col].fillna(df[col].mode()[0], inplace=True)
        else:
            df[col].fillna(df[col].mean(), inplace=True)
    return df

def standardize_dates(df):
    date_cols = [col for col in df.columns if 'date' in col.lower()]
    for col in date_cols:
        df[col] = pd.to_datetime(df[col], errors='coerce').dt.strftime('%Y-%m-%d')
    return df

def remove_duplicates(df):
    return df.drop_duplicates()

def fix_wrong_data(df):
    if 'age' in df.columns:
        df = df[df['age'] >= 0]
    return df

def remove_unnecessary_columns(df, unnecessary_cols=None):
    if unnecessary_cols is None:
        unnecessary_cols = ['unnecessary', 'notes', 'misc']
    df = df.drop([col for col in unnecessary_cols if col in df.columns], axis=1)
    return df

def clean_dataset(df):
    df = handle_missing_values(df)
    df = standardize_dates(df)
    df = remove_duplicates(df)
    df = fix_wrong_data(df)
    df = remove_unnecessary_columns(df)
    return df

# Usage
df1, df2 = load_datasets('Sales.csv', 'Mine.csv')
df1_clean = clean_dataset(df1)
df2_clean = clean_dataset(df2)

df1_clean.to_csv('cleaned_Sales.csv', index=False)
df2_clean.to_csv('cleaned_Mine.csv', index=False)

print("Datasets cleaned and saved as 'cleaned_Sales.csv' and 'cleaned_Mine.csv'")

Datasets cleaned and saved as 'cleaned_Sales.csv' and 'cleaned_Mine.csv'


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values 