In [1]:
from src.Get_data_DB import DataTransformer
import os
import requests
import pandas as pd
from datetime import date, datetime
from dotenv import load_dotenv
import traceback

# Load bi·∫øn m√¥i tr∆∞·ªùng t·ª´ file .env
load_dotenv()

# Kh·ªüi t·∫°o transformer ƒë·ªÉ truy v·∫•n SQL Server
transformer = DataTransformer()

# Khai b√°o mapping gi·ªØa account v√† token ƒë√∫ng
ACCESS_TOKENS = {
    "C9": os.getenv("Cole_token"),
    "Cole8": os.getenv("BM_token")
}

# File log l·ªói
LOG_FILE = "/home/duclu/DWH_Cole_Project/log_error.text"

def log_error(message: str):
    """Ghi l·ªói v√†o file log v·ªõi timestamp"""
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {message}\n")

# Truy v·∫•n danh s√°ch chi·∫øn d·ªãch ƒë√£ d·ª´ng
paused_campaign_query = """
    SELECT STT AS campaign_id, 
           Chien_dich AS campaign_name,
           Ngay_bat_dau,
           Account
    FROM Chien_dich_Meta
    WHERE Account IN ('C9','Cole8')
      AND Trang_thai = 'ACTIVE'
"""
try:
    df = transformer.fetch_from_sql_server(paused_campaign_query)
except Exception as e:
    log_error(f"L·ªói khi truy v·∫•n SQL: {e}\n{traceback.format_exc()}")
    raise

# H√†m l·∫•y chi ph√≠ theo ng√†y t·ª´ Facebook Graph API
def fetch_campaign_spend(campaign_id, access_token, start_date, end_date):
    url = f"https://graph.facebook.com/v20.0/{campaign_id}/insights"
    params = {
        "access_token": access_token,
        "fields": "spend,date_start",
        "time_range": f'{{"since":"{start_date}", "until":"{end_date}"}}',
        "time_increment": 1,
        "limit": 100
    }
    res = requests.get(url, params=params)
    res.raise_for_status()
    return res.json().get("data", [])

# L·∫•y ng√†y h√¥m nay
today = date.today().strftime("%Y-%m-%d")

# X·ª≠ l√Ω cho t·ª´ng t√†i kho·∫£n: C9 v√† Cole8
for account in ["C9", "Cole8"]:
    access_token = ACCESS_TOKENS[account]
    account_campaigns = df[df["Account"] == account].to_dict(orient="records")
    all_rows = []

    print(f"\nüîç ƒêang x·ª≠ l√Ω t√†i kho·∫£n {account} - T·ªïng s·ªë chi·∫øn d·ªãch: {len(account_campaigns)}")

    for campaign in account_campaigns:
        campaign_id = campaign["campaign_id"]
        campaign_name = campaign["campaign_name"]
        start_date = campaign["Ngay_bat_dau"].strftime("%Y-%m-%d") if pd.notnull(campaign["Ngay_bat_dau"]) else "2024-01-01"

        try:
            print(f"üìä L·∫•y d·ªØ li·ªáu spend: {campaign_name} ({campaign_id}) t·ª´ {start_date}")
            spend_data = fetch_campaign_spend(campaign_id, access_token, start_date, today)
            for d in spend_data:
                all_rows.append({
                    "Campaign ID": campaign_id,
                    "Campaign Name": campaign_name,
                    "Date": d["date_start"],
                    "Spend": float(d["spend"])
                })
        except Exception as e:
            err_msg = f"L·ªói v·ªõi chi·∫øn d·ªãch {campaign_name} ({campaign_id}): {e}\n{traceback.format_exc()}"
            print(f"‚ö†Ô∏è {err_msg}")
            log_error(err_msg)

    # Ghi d·ªØ li·ªáu v√†o file CSV theo t·ª´ng t√†i kho·∫£n
    try:
        df_spend = pd.DataFrame(all_rows)
        output_path = os.path.expanduser(f"~/DWH_Cole_Project/data_tmp/spend_{account}_ACTIVE.csv")
        df_spend.to_csv(output_path, index=False)
        print(f"‚úÖ ƒê√£ ghi file CSV cho {account}: {output_path}")
    except Exception as e:
        log_error(f"L·ªói khi ghi CSV cho {account}: {e}\n{traceback.format_exc()}")



üîç ƒêang x·ª≠ l√Ω t√†i kho·∫£n C9 - T·ªïng s·ªë chi·∫øn d·ªãch: 2
üìä L·∫•y d·ªØ li·ªáu spend: FA_Ai.Nocode_BinhND_TT_Page AI_22_04 (120223246111590236) t·ª´ 2025-04-22
üìä L·∫•y d·ªØ li·ªáu spend: FA_AI.2025_HienPT_CVS_12_08 (120230933439750236) t·ª´ 2025-08-12
‚úÖ ƒê√£ ghi file CSV cho C9: /home/duclu/DWH_Cole_Project/data_tmp/spend_C9_ACTIVE.csv

üîç ƒêang x·ª≠ l√Ω t√†i kho·∫£n Cole8 - T·ªïng s·ªë chi·∫øn d·ªãch: 3
üìä L·∫•y d·ªØ li·ªáu spend: FA_ML.I_BinhND_CVS_Page BOT_18_06 (120230404055340679) t·ª´ 2025-06-18
üìä L·∫•y d·ªØ li·ªáu spend: FA_ML.I_BinhND_CVS_Page BOT_08_08 (120233571957370679) t·ª´ 2025-08-10
üìä L·∫•y d·ªØ li·ªáu spend: FA_ML.I_BinhND_CVS_Page BOT_18_06 - B·∫£n sao (120233976431820679) t·ª´ 2025-08-19
‚úÖ ƒê√£ ghi file CSV cho Cole8: /home/duclu/DWH_Cole_Project/data_tmp/spend_Cole8_ACTIVE.csv


In [2]:
from src.Get_data_DB import DataTransformer
# T·∫°o instance c·ªßa class
transformer = DataTransformer()

# L·∫•y d·ªØ li·ªáu l·∫ßn ƒë·∫ßu t·ª´ nƒÉm 2024 Mysql  DATE_SUB(NOW(), INTERVAL 3 MONTH)
mysql_query = """SELECT DATE(DATE_ADD(l.created_at, INTERVAL 7 HOUR)) AS Thoi_gian,                                               
                        lp.product_id AS Ma_khoa_hoc,
                        COUNT(DISTINCT CASE WHEN s2.sale_order_level_id = 1 THEN l.id END) AS L1,
                        COUNT(DISTINCT CASE WHEN s2.sale_order_level_id = 1 THEN l.id END) - COUNT(DISTINCT CASE WHEN s2.sale_order_level_id = 3 THEN l.id END) AS L1_L1C,
                        COUNT(DISTINCT CASE WHEN s2.sale_order_level_id = 16 THEN l.id END) AS L7,
                        COUNT(DISTINCT CASE WHEN s2.sale_order_level_id = 19 THEN l.id END) AS L8
                FROM leads l 
                JOIN leads_products lp ON lp.lead_id = l.id
                JOIN orders o ON o.lead_id = l.id
                JOIN sale_order_histories s1 ON s1.order_id = o.id AND s1.sale_order_level_id = 1
                JOIN sale_order_histories s2 ON s2.order_id = o.id 
                WHERE l.status = 1 
                AND DATE(DATE_ADD(l.created_at, INTERVAL 7 HOUR)) >=  DATE_SUB(NOW(), INTERVAL 4 MONTH)
                AND l.utm_source='FA'
                GROUP BY Thoi_gian, Ma_khoa_hoc
                    """
df = transformer.fetch_from_mysql(mysql_query)
df.to_csv("~/DWH_Cole_Project/data_tmp/Count_L1_8_FA.csv",index=False)

In [3]:
import pandas as pd
import os
from rapidfuzz import fuzz, process
from src.Get_data_DB import DataTransformer
from src.Process_utm import ColumnStandardizer


standardizer = ColumnStandardizer(
    threshold=75,
    preserve_if_low_similarity=[]
)
transformer=DataTransformer()

# C√°c h√†m ƒë·ªÉ chu·∫©n ho√° t√™n kho√° h·ªçc trong t√™n chi·∫øn d·ªãch
def build_standard_list(series):
    """T·∫°o danh s√°ch chu·∫©n t·ª´ Series: lo·∫°i b·ªè tr√πng v√† chu·∫©n ho√° ch·ªØ th∆∞·ªùng"""
    clean_series = (
        series.dropna()
        .drop_duplicates()
        .astype(str)
        .str.strip()
    )
    standard_list = clean_series.str.lower().tolist()
    standard_map = dict(zip(clean_series.str.lower(), clean_series))
    return standard_list, standard_map

def match_course_name(value, standard_list, standard_map, threshold=60):
    """T√¨m match fuzzy cho 1 t√™n kho√° h·ªçc"""
    if pd.isna(value):
        return "Kh√°c"

    val = str(value).strip().lower()
    result = process.extractOne(val, standard_list, scorer=fuzz.ratio)

    if result is None:
        return "Kh√°c"

    match, score, _ = result
    if score >= threshold:
        return standard_map[match]
    else:
        return "Kh√°c"

def standardize_course_column(input_series, standard_list, standard_map, threshold=60):
    """√Åp d·ª•ng chu·∫©n ho√° cho c·∫£ c·ªôt"""
    return input_series.apply(lambda x: match_course_name(x, standard_list, standard_map, threshold))

Query_KH=""" select Ma_khoa_hoc, Ten_khoa_hoc 
            from Dim_Khoa_hoc """

# ƒê·ªçc d·ªØ li·ªáu
df_KH=transformer.fetch_from_sql_server(Query_KH)

df_L=pd.read_csv("~/DWH_Cole_Project/data_tmp/Count_L1_8_FA.csv")
# ƒê·ªãnh nghƒ©a c·∫•u tr√∫c DataFrame m·∫´u khi file r·ªóng
empty_df_template = pd.DataFrame(columns=['Campaign ID', 'Campaign Name', 'Date', 'Spend'])

# H√†m ki·ªÉm tra v√† ƒë·ªçc file CSV
def read_csv_safe(file_path):
    full_path = os.path.expanduser(file_path)
    
    # Ki·ªÉm tra file c√≥ t·ªìn t·∫°i v√† c√≥ k√≠ch th∆∞·ªõc > 0 byte kh√¥ng
    if not os.path.exists(full_path) or os.path.getsize(full_path) == 0:
        print(f"File {file_path} r·ªóng ho·∫∑c kh√¥ng t·ªìn t·∫°i. T·∫°o DataFrame r·ªóng.")
        return empty_df_template.copy()
    
    try:
        # Th·ª≠ ƒë·ªçc file CSV
        df = pd.read_csv(full_path)
        
        # Ki·ªÉm tra n·∫øu DataFrame ƒë·ªçc ƒë∆∞·ª£c c√≥ d·ªØ li·ªáu
        if df.empty:
            print(f"File {file_path} kh√¥ng c√≥ d·ªØ li·ªáu. T·∫°o DataFrame r·ªóng.")
            return empty_df_template.copy()
            
        return df
    
    except pd.errors.EmptyDataError:
        print(f"File {file_path} kh√¥ng c√≥ d·ªØ li·ªáu (EmptyDataError). T·∫°o DataFrame r·ªóng.")
        return empty_df_template.copy()
    except Exception as e:
        print(f"L·ªói khi ƒë·ªçc file {file_path}: {str(e)}. T·∫°o DataFrame r·ªóng.")
        return empty_df_template.copy()

# ƒê∆∞·ªùng d·∫´n file

#file_path3 = "~/DWH_Cole_Project/data_tmp/spend_C9_PAUSED.csv"   #L·∫•y d·ªØ li·ªáu 1 l·∫ßn duy nh·∫•t ·ªü l·∫ßn ch·∫°y ƒë·∫ßu ti√™n
#file_path4 = "~/DWH_Cole_Project/data_tmp/spend_Cole8_PAUSED.csv"
file_path1 = "~/DWH_Cole_Project/data_tmp/spend_C9_ACTIVE.csv"
file_path2 = "~/DWH_Cole_Project/data_tmp/spend_Cole8_ACTIVE.csv"

# ƒê·ªçc d·ªØ li·ªáu
df_cf1 = read_csv_safe(file_path1)
df_cf2 = read_csv_safe(file_path2)
#df_cf3=pd.read_csv(file_path3)
#df_cf4=pd.read_csv(file_path4)
df_cf = pd.concat([df_cf1,df_cf2], ignore_index=True)

df_cf['Ten_khoa_hoc'] = df_cf['Campaign Name'].str.split('_').str[1]
df_cf['Ma_marketer'] = df_cf['Campaign Name'].str.split('_').str[2]

df_cf['Ma_marketer'] = standardizer.transform(df_cf['Ma_marketer'])


# B∆∞·ªõc 1: T·∫°o danh s√°ch chu·∫©n
standard_list, standard_map = build_standard_list(df_KH['Ten_khoa_hoc'].drop_duplicates())

# Tr∆∞·ªùng h·ª£p ƒë·∫∑c bi·ªát 6 th√°ng ƒë·∫ßu nƒÉm 2025, Ai.Nocode ch·∫°y cho kho√° DTDN
df_cf['Date'] = pd.to_datetime(df_cf['Date'])
mask = (df_cf['Ten_khoa_hoc'] == 'Ai.Nocode') & (df_cf['Date'] >= '2025-01-01') & (df_cf['Date'] <= '2025-10-01')
df_cf.loc[mask, 'Ten_khoa_hoc'] = 'DTDN'   

df_cf['Ten_khoa_hoc'] = df_cf['Ten_khoa_hoc'].replace('BI', 'BI.01', regex=False)

# B∆∞·ªõc 2: Chu·∫©n ho√° c·ªôt df_cf['Ten_khoa_hoc']
df_cf['Ten_khoa_hoc'] = standardize_course_column(
    df_cf['Ten_khoa_hoc'],
    standard_list,
    standard_map,
    threshold=70
)

df_cf["Date"] = pd.to_datetime(df_cf["Date"]).dt.date
df_L["Thoi_gian"] = pd.to_datetime(df_L["Thoi_gian"]).dt.date
df_cf = df_cf.rename(columns={
    'Spend': 'Chi_phi',
    'Date': 'Thoi_gian'
})
df_cf=df_cf.merge(df_KH,on='Ten_khoa_hoc',how='inner')
df_cf = df_cf.drop(columns=['Campaign ID', 'Campaign Name','Ten_khoa_hoc'])


# L·∫•y nh·ªØng b·∫£n ghi tho·∫£ m√£n v√† b·∫£n ghi c√≥ chi ph√≠ nh∆∞ng kh√¥ng c√≥ s·ªë chuy·ªÉn ƒë·ªïi --> ƒê·ªï v√†o b·∫£ng Chi_phi_FA
df_cf = df_cf.groupby(['Thoi_gian', 'Ma_khoa_hoc','Ma_marketer'])[['Chi_phi']].sum().reset_index()

df_FA=df_cf.merge(df_L,on=['Thoi_gian','Ma_khoa_hoc'], how='left')

# V√¨ s·∫£n ph·∫©m DE.COMBO01 c√≥ 2 m√£ kho√° n√™n ·ªü b∆∞·ªõc √°nh x·∫° t·ª´ Ten_khoa_hoc th√†nh Ma_khoa_hoc ph√°t sinh th√†nh 2 b·∫£n ghi Chi ph√≠ =
# 1. T√°ch d·ªØ li·ªáu c·∫ßn x·ª≠ l√Ω v√† ph·∫ßn c√≤n l·∫°i
df_to_process = df_FA[df_FA['Ma_khoa_hoc'].isin([515, 550])]
df_others = df_FA[~df_FA['Ma_khoa_hoc'].isin([515, 550])]

# 2. N·∫øu c√≥ b·∫£n ghi c·∫ßn x·ª≠ l√Ω
if not df_to_process.empty:
    def xu_ly_nhom(gr):
        if len(gr) == 2:
            na_mask = gr['L1'].isna()
            if na_mask.sum() == 1:
                return gr[~na_mask]
            elif na_mask.sum() == 2:
                if (gr['Ma_khoa_hoc'] == 515).any():
                    return gr[gr['Ma_khoa_hoc'] == 515]
                else:
                    return pd.DataFrame()
            else:
                return gr
        else:
            return gr[~gr['L1'].isna()] if gr['L1'].isna().any() else gr

    df_processed = (
        df_to_process
        .groupby(['Chi_phi', 'Thoi_gian'], group_keys=False)
        .apply(xu_ly_nhom)
    )
else:
    # Kh√¥ng c√≥ g√¨ ƒë·ªÉ x·ª≠ l√Ω
    df_processed = pd.DataFrame(columns=df_FA.columns)

# 3. Gh√©p l·∫°i d·ªØ li·ªáu
df_FA = pd.concat([df_processed, df_others], ignore_index=True)


df_FA = df_FA.fillna(0)
df_L78=df_FA[['Thoi_gian','Ma_khoa_hoc','Ma_marketer','L7','L8']]
df_FA=df_FA[['Thoi_gian','Ma_khoa_hoc','Ma_marketer','Chi_phi','L1','L1_L1C']]



  df_FA = pd.concat([df_processed, df_others], ignore_index=True)
  df_FA = df_FA.fillna(0)


In [4]:
# Tr∆∞·ªùng h·ª£p c√≥ s·ªë L1 chuy·ªÉn ƒë·ªïi  nh∆∞ng kh√¥ng c√≥ chi ph√≠ th√¨ chuy·ªÉn th√†nh s·ªë L1_mess ·ªü b·∫£ng Chi_phi_mess 
df_mess_bonus=df_cf.merge(df_L, on=['Ma_khoa_hoc', 'Thoi_gian'], how='right')
df_mess_bonus=df_mess_bonus[['Thoi_gian','Chi_phi','Ma_khoa_hoc','Ma_marketer','L1','L1_L1C','L7','L8']]
df_mess_bonus = df_mess_bonus[df_mess_bonus['Chi_phi'].isna()]
df_mess_bonus['Chi_phi'] = df_mess_bonus['Chi_phi'].fillna(0)


# Ghi d·ªØ li·ªáu v√†o folder data_result
df_FA.to_csv("~/DWH_Cole_Project/data_result/Chi_phi_FA_transformed.csv", index=False)
df_L78.to_csv("~/DWH_Cole_Project/data_result/L78_FA_transformed.csv", index=False)
df_mess_bonus.to_csv("~/DWH_Cole_Project/data_result/Chi_phi_mess_bonus_transformed.csv", index=False)