## IMPORT LIBRARY


In [1]:
import os
import re
from datetime import datetime
import pandas as pd
from IPython.display import HTML

## IMPORT SOURCES


In [2]:
base_df = pd.read_csv("./base-data.csv")

## CALCULATION


### 1. LOAD ALL ETOP COURSES


In [5]:
data_folder = "data"
courses = []

for filename in os.listdir(data_folder):
    if filename.endswith(".xlsx"):
        # Remove .xlsx extension
        name_without_ext = filename.replace(".xlsx", "")
        courses.append({"name": name_without_ext[0:5], "path": os.path.join(data_folder, filename)})

print(courses)

[{'name': 'EM279', 'path': 'data\\EM279 Sức mạnh của việc xây dựng tầm nhìn - Choi Kyoung Mi_Attendances_20250825-0955.xlsx'}, {'name': 'EM282', 'path': 'data\\EM282 Tìm hiểu về ngành Mỹ thuật và Kiến trúc - Tu My Hieu_Attendances_20250825-0955.xlsx'}, {'name': 'EM284', 'path': 'data\\EM284 3 bí kíp giảm áp lực trong việc học và trong cuộc sống - Le Van Tien Si_Attendances_20250825-0956.xlsx'}, {'name': 'EM285', 'path': 'data\\EM285 Self-confidence  - Nguyen Hoai Phong_Attendances_20250825-0956.xlsx'}, {'name': 'EM286', 'path': 'data\\EM286 Khám phá sự nghiệp từ những cơ hội bất ngờ! - Anderson _Attendances_20250825-0943.xlsx'}, {'name': 'EM286', 'path': 'data\\EM286 Khám phá sự nghiệp từ những cơ hội bất ngờ! - Anderson _Attendances_20250825-0958.xlsx'}, {'name': 'EM287', 'path': 'data\\EM287 Khoa học sức khoẻ - Sự học lâu dài - Ung Nguyen Vu Hoang_Attendances_20250825-0959.xlsx'}, {'name': 'EM288', 'path': 'data\\EM288 How to have a Great STEM Interview_ - Brianna Ha_Attendances_2025

### 2. FOR EACH COURSE THEN CAL


In [6]:
# Khởi tạo result_df với base_df
result_df = base_df.copy()

for item in courses:
    df = pd.DataFrame()
    print(f"Processing {item['path']}...")
    try:
        # Read the Excel file, skipping the first 3 rows
        df = pd.read_excel(item["path"], sheet_name="Attendances", skiprows=3)
        
        # Rename "First name" to "PALS_ID" if it exists
        if "First name" in df.columns:
            df.rename(
                columns={
                    "First name": "PALS_ID",
                },
                inplace=True,
            )
        else:
            print(f"Warning: 'First name' column not found in {item['path']}. Skipping...")
            continue
        
        # Filter PALS_ID based on a regex pattern
        pattern = r"[A-Z]{2}[0-9]{2}[A-Z]{1}[0-9]{4}"
        if "PALS_ID" in df.columns:
            df = df[df["PALS_ID"].str.match(pattern, na=False)]
        else:
            print(f"Warning: 'PALS_ID' column not found in {item['path']}. Skipping...")
            continue
        
        # Define a pattern to match date columns
        # date_pattern = r'\d{2}\s\w{3}\s\d{4}\s\d{1,2}\.\d{2}[AP]M'
        date_pattern = r'\d{2}\s\w{3}\s\d{4}\s\d{1,2}\.\d{2}(AM|PM)\s?.*'
        date_columns = [col for col in df.columns if re.match(date_pattern, str(col))]
        
        # Convert original date columns to new format
        new_date_columns = {}
        for old_col in date_columns:
            date_part = ' '.join(old_col.split(' ')[:4])
            date_obj = datetime.strptime(date_part, '%d %b %Y %I.%M%p')
            new_col_name = f"{item['name']} {date_obj.strftime('%d/%m/%y %H:%M')}"
            new_date_columns[old_col] = new_col_name
        
        # Rename columns in the dataframe
        df_renamed = df.rename(columns=new_date_columns)
        
        # Select relevant columns
        df_renamed = df_renamed[["PALS_ID"] + list(new_date_columns.values())]
        
        # Perform left join with base_df
        result_df = pd.merge(result_df, df_renamed, on="PALS_ID", how="left")
        
        # Display or save the result (optional)
        print(f"Processed {item['name']} successfully")
        # Uncomment the line below to display the result
        # print(result_df)
        
    except Exception as e:
        print(f"Error processing {item['path']}: {str(e)}")
        continue

Processing data\EM279 Sức mạnh của việc xây dựng tầm nhìn - Choi Kyoung Mi_Attendances_20250825-0955.xlsx...
Processed EM279 successfully
Processing data\EM282 Tìm hiểu về ngành Mỹ thuật và Kiến trúc - Tu My Hieu_Attendances_20250825-0955.xlsx...
Processed EM282 successfully
Processing data\EM284 3 bí kíp giảm áp lực trong việc học và trong cuộc sống - Le Van Tien Si_Attendances_20250825-0956.xlsx...
Processed EM284 successfully
Processing data\EM285 Self-confidence  - Nguyen Hoai Phong_Attendances_20250825-0956.xlsx...
Processed EM285 successfully
Processing data\EM286 Khám phá sự nghiệp từ những cơ hội bất ngờ! - Anderson _Attendances_20250825-0943.xlsx...
Processed EM286 successfully
Processing data\EM286 Khám phá sự nghiệp từ những cơ hội bất ngờ! - Anderson _Attendances_20250825-0958.xlsx...
Processed EM286 successfully
Processing data\EM287 Khoa học sức khoẻ - Sự học lâu dài - Ung Nguyen Vu Hoang_Attendances_20250825-0959.xlsx...
Processed EM287 successfully
Processing data\EM288

### EXPORT DATA & DISPLAY


In [7]:
result_df.to_csv("./export/result.csv", index=False)
display(HTML(result_df.head(100).to_html(index=False)))

PROGRAM,PALS_ID,FULLNAME,EMAIL,EM279 13/09/24 09:00,EM279 15/09/24 09:00,EM279 15/09/24 14:30,EM279 19/09/24 10:30,EM279 26/09/24 14:00,EM279 17/11/24 15:00,EM279 24/11/24 15:00,EM282 10/11/24 09:00,EM282 13/11/24 10:30,EM282 20/11/24 14:00,EM282 26/02/25 14:00,EM284 20/11/24 09:00,EM284 22/11/24 09:00,EM284 22/11/24 14:30,EM284 22/11/24 20:00,EM284 23/11/24 09:00,EM284 26/11/24 10:30,EM284 11/03/25 14:00,EM285 13/12/24 14:00,EM285 21/03/25 14:00,EM285 24/04/25 20:30,EM286 30/11/24 09:00_x,EM286 13/12/24 14:00_x,EM286 14/01/25 21:30_x,EM286 21/03/25 14:00_x,EM286 30/11/24 09:00_y,EM286 13/12/24 14:00_y,EM286 14/01/25 21:30_y,EM286 21/03/25 14:00_y,EM287 24/01/25 09:00,EM287 26/01/25 09:00,EM287 26/01/25 14:30,EM287 26/01/25 20:00,EM287 27/01/25 09:00,EM287 30/01/25 10:30,EM287 15/05/25 14:00,EM288 21/02/25 09:00,EM288 23/02/25 09:00,EM288 23/02/25 14:30,EM288 23/02/25 20:00,EM288 24/02/25 09:00,EM288 27/02/25 10:30,EM288 12/06/25 14:00,EM289 11/04/25 00:00,EM289 11/04/25 09:00,EM289 13/04/25 09:00,EM289 13/04/25 14:30,EM289 13/04/25 20:00,EM289 14/04/25 09:00,EM289 17/04/25 10:30,EM289 24/04/25 14:00,EM289 31/07/25 14:00,EM290 18/04/25 09:00,EM290 18/04/25 20:00,EM290 20/04/25 09:00,EM290 20/04/25 14:30,EM290 20/04/25 20:00,EM290 21/04/25 09:00,EM290 24/04/25 10:30,EM291 21/03/25 09:00_x,EM291 23/03/25 09:00_x,EM291 23/03/25 14:30_x,EM291 23/03/25 20:00_x,EM291 23/03/25 20:00_x.1,EM291 23/03/25 20:00_x.2,EM291 23/03/25 20:00_x.3,EM291 24/03/25 09:00_x,EM291 27/03/25 10:30_x,EM291 11/06/25 19:30_x,EM291 10/07/25 14:00_x,EM291 21/03/25 09:00_y,EM291 23/03/25 09:00_y,EM291 23/03/25 14:30_y,EM291 23/03/25 20:00_y,EM291 23/03/25 20:00_y.1,EM291 23/03/25 20:00_y.2,EM291 23/03/25 20:00_y.3,EM291 24/03/25 09:00_y,EM291 27/03/25 10:30_y,EM291 11/06/25 19:30_y,EM291 10/07/25 14:00_y,EM292 16/04/25 14:00,EM292 23/07/25 14:00,EM293 21/02/25 09:00,EM293 23/02/25 09:00,EM293 23/02/25 14:30,EM293 23/02/25 20:00,EM293 24/02/25 09:00,EM293 27/02/25 10:30,EM293 13/04/25 10:00,EM293 12/06/25 14:00,EM294 21/02/25 09:00,EM294 23/02/25 09:00,EM294 23/02/25 14:30,EM294 23/02/25 20:00,EM294 24/02/25 09:00,EM294 27/02/25 10:30,EM294 12/06/25 14:00,EM296 18/05/25 20:00,EM296 17/06/25 19:30
SEEDS,PY21P0005,TRẦN THỊ MỸ TƯỜNG,tuong.py21p0005@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
SEEDS,PY21P0007,LÊ THẢO VI,vi.py21p0007@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
SEEDS,QT22P0377,TRƯƠNG THỊ ANH TÚ,tu.qt22p0377@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
SEEDS,QT22P0400,NGUYỄN THÚY HÀ,ha.qt22p0400@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
SEEDS,QT23P0448,PHÙNG MỸ GIAO,giao.qt23p0448@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
SEEDS,HU23P0527,ĐẶNG TRƯƠNG GIA HUY,huy.hu23p0527@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
SEEDS,HU23P0528,NGÔ THỊ QÙYNH MƠ,mo.hu23p0528@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
SEEDS,JP24P0003,PHAN THỊ HUYỀN,hynphan2359@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
GEMS,DT23P0797,Trần Lữ Kim Ngân,tranngukimngan112022@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
GEMS,DT22P0729,Nguyễn Thị Mỹ Kiều,kieu.nguyen.23dt@gmail.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
