In [1]:
import pandas as pd
import logging

logging.basicConfig(filename='etl_process.log', level=logging.INFO)

def transform_employees():
    try:
        df = pd.read_csv('staging/employees.csv')
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        df['department'] = df['department'].str.strip().str.title()
        logging.info('Transformed employees data successfully.')
        return df
    except Exception as e:
        logging.error(f"Error in transform_employees: {e}")

def transform_skills():
    try:
        df = pd.read_csv('staging/skills.csv')
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        logging.info('Transformed skills data successfully.')
        return df
    except Exception as e:
        logging.error(f"Error in transform_skills: {e}")

def transform_certifications():
    try:
        df = pd.read_csv('staging/certifications.csv')
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        logging.info('Transformed certifications data successfully.')
        return df
    except Exception as e:
        logging.error(f"Error in transform_certifications: {e}")

def transform_employee_skills():
    try:
        df = pd.read_csv('staging/employee_skills.csv')
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        df['skill_level'] = df['skill_level'].str.strip().str.title()
        df['skill_progress'] = df['skill_progress'].clip(lower=0, upper=100)
        logging.info('Transformed employee skills data successfully.')
        return df
    except Exception as e:
        logging.error(f"Error in transform_employee_skills: {e}")

def transform_employee_certificates():
    try:
        df = pd.read_csv('staging/employee_certificates.csv')
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        logging.info('Transformed employee certificates data successfully.')
        return df
    except Exception as e:
        logging.error(f"Error in transform_employee_certificates: {e}")

def transform_courses():
    try:
        df = pd.read_csv('staging/courses.csv')
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        df['department'] = df['department'].str.strip().str.title()
        df['role'] = df['role'].str.strip().str.title()
        df['course_name'] = df['course_name'].str.strip().str.title()
        logging.info('Transformed courses data successfully.')
        return df
    except Exception as e:
        logging.error(f"Error in transform_courses: {e}")

def transform_employee_courses():
    try:
        df = pd.read_csv('staging/employee_courses.csv')
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        logging.info('Transformed employee courses data successfully.')
        return df
    except Exception as e:
        logging.error(f"Error in transform_employee_courses: {e}")

if __name__ == "__main__":
    transformed_employees = transform_employees()
    transformed_employees.to_csv('transformation/transformed_employees.csv', index=False)

    transformed_skills = transform_skills()
    transformed_skills.to_csv('transformation/transformed_skills.csv', index=False)

    transformed_certifications = transform_certifications()
    transformed_certifications.to_csv('transformation/transformed_certifications.csv', index=False)

    transformed_employee_skills = transform_employee_skills()
    transformed_employee_skills.to_csv('transformation/transformed_employee_skills.csv', index=False)

    transformed_employee_certificates = transform_employee_certificates()
    transformed_employee_certificates.to_csv('transformation/transformed_employee_certificates.csv', index=False)

    transformed_courses = transform_courses()
    transformed_courses.to_csv('transformation/transformed_courses.csv', index=False)

    transformed_employee_courses = transform_employee_courses()
    transformed_employee_courses.to_csv('transformation/transformed_employee_courses.csv', index=False)

    print("All transformations completed and saved successfully.")


All transformations completed and saved successfully.
