In [5]:
import pandas as pd
import logging

logging.basicConfig(filename='etl_process.log', level=logging.INFO)

def generate_reports():
    try:
        
        employees_df = pd.read_csv('transformation/transformed_employees.csv')
        skills_df = pd.read_csv('transformation/transformed_skills.csv')
        employee_skills_df = pd.read_csv('transformation/transformed_employee_skills.csv')
        employee_certificates_df = pd.read_csv('transformation/transformed_employee_certificates.csv')
        courses_df = pd.read_csv('transformation/transformed_courses.csv')
        employee_courses_df = pd.read_csv('transformation/transformed_employee_courses.csv')

        skill_summary = employee_skills_df.groupby('skill_id')['skill_level'].value_counts().unstack(fill_value=0)
        skill_summary.to_csv('reports/skill_summary_report.csv')
        logging.info('Generated skill summary report successfully.')

        certification_summary = employee_certificates_df.groupby('certification_id').size().reset_index(name='count')
        certification_summary.to_csv('reports/certification_summary_report.csv', index=False)
        logging.info('Generated certification summary report successfully.')

        competency_summary = employee_skills_df.groupby(['employee_id', 'skill_level']).size().unstack(fill_value=0)
        competency_summary.to_csv('reports/competency_summary_report.csv')
        logging.info('Generated competency summary report successfully.')

        employee_certificate_summary = employee_certificates_df.groupby('employee_id')['certification_id'].count().reset_index(name='certification_count')
        employee_certificate_summary.to_csv('reports/employee_certificate_summary_report.csv', index=False)
        logging.info('Generated employee certificate summary report successfully.')

        avg_skill_progress = employee_skills_df.merge(employees_df, on='employee_id') \
            .groupby('department')['skill_progress'].mean().reset_index()
        avg_skill_progress.to_csv('reports/average_skill_progress_by_department.csv', index=False)
        logging.info('Generated average skill progress by department report successfully.')

        course_enrollment_summary = employee_courses_df.groupby('course_id').size().reset_index(name='enrollment_count')
        course_enrollment_summary.to_csv('reports/course_enrollment_summary_report.csv', index=False)
        logging.info('Generated course enrollment summary report successfully.')

        employee_course_summary = employee_courses_df.groupby('employee_id')['course_id'].count().reset_index(name='course_count')
        employee_course_summary.to_csv('reports/employee_course_summary_report.csv', index=False)
        logging.info('Generated employee course summary report successfully.')

        avg_course_enrollment = employee_courses_df.merge(employees_df, on='employee_id') \
            .groupby('department')['course_id'].count().reset_index(name='course_enrollment_count')
        avg_course_enrollment.to_csv('reports/average_course_enrollment_by_department.csv', index=False)
        logging.info('Generated average course enrollment by department report successfully.')

    except Exception as e:
        logging.error(f"Error in generate_reports: {e}")

if __name__ == "__main__":
    generate_reports()
