In [1]:
import pandas as pd
import random
import os
import requests
import csv
import time

# --- C·∫•u h√¨nh Moodle API ---
MOODLE_URL = 'http://localhost:8100/webservice/rest/server.php'
TOKEN = 'ac6bc02c96ff699f2d0df05b9382e23b' # Thay th·∫ø b·∫±ng token Moodle c·ªßa b·∫°n
FORMAT = 'json'

# --- H√†m g·ªçi Moodle API ---
def call_api(function, extra_params):
    """
    G·ªçi Moodle Web Service API.
    """
    params = {
        'wstoken': TOKEN,
        'moodlewsrestformat': FORMAT,
        'wsfunction': function
    }
    params.update(extra_params)
    try:
        response = requests.post(MOODLE_URL, data=params)
        response.raise_for_status() # Ki·ªÉm tra l·ªói HTTP
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"‚ùå L·ªói khi g·ªçi Moodle API '{function}': {e}")
        return {} # Tr·∫£ v·ªÅ dictionary r·ªóng ƒë·ªÉ tr√°nh l·ªói

# --- H√†m l·∫•y Cluster c·ªßa ng∆∞·ªùi d√πng t·ª´ file CSV ---
def get_user_cluster(user_id):
    """
    L·∫•y th√¥ng tin cluster c·ªßa ng∆∞·ªùi d√πng t·ª´ file CSV.
    """
    file_path = "./synthetic_user_features_clustered.csv" # ƒê·∫£m b·∫£o ƒë∆∞·ªùng d·∫´n ƒë√∫ng
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print("‚ùå Kh√¥ng t√¨m th·∫•y file: synthetic_user_features_clustered.csv")
        return None
    user_row = df[df['userid'] == user_id]
    if user_row.empty:
        # print(f"‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y ng∆∞·ªùi d√πng v·ªõi ID: {user_id} trong file cluster.")
        return None # Tr·∫£ v·ªÅ None n·∫øu kh√¥ng t√¨m th·∫•y cluster
    return user_row.iloc[0]['cluster']

# --- H·∫±ng s·ªë v√† si√™u tham s·ªë (Hyperparameters) cho Q-learning ---
LEARNING_RATE = 0.1    # T·ªëc ƒë·ªô h·ªçc c·ªßa thu·∫≠t to√°n
DISCOUNT_FACTOR = 0.9  # T·∫ßm quan tr·ªçng c·ªßa ph·∫ßn th∆∞·ªüng trong t∆∞∆°ng lai
EXPLORATION_RATE = 0.2 # T·ª∑ l·ªá kh√°m ph√° (ch·ªçn h√†nh ƒë·ªông ng·∫´u nhi√™n)

# ƒê·ªãnh nghƒ©a c√°c h√†nh ƒë·ªông (Actions)
ACTIONS = [
    'read_new_resource',
    'review_old_resource',
    'attempt_new_quiz',
    'redo_failed_quiz',
    'skip_to_next_module'
]

# √Ånh x·∫° lo·∫°i h√†nh ƒë·ªông t·ª´ CSV sang c√°c h√†nh ƒë·ªông ƒë√£ ƒë·ªãnh nghƒ©a
def get_action_from_type(activity_type):
    """
    √Ånh x·∫° lo·∫°i ho·∫°t ƒë·ªông t·ª´ log sang h√†nh ƒë·ªông Q-learning.
    L∆∞u √Ω: C·∫ßn logic ph·ª©c t·∫°p h∆°n ƒë·ªÉ ph√¢n bi·ªát attempt_new_quiz v√† redo_failed_quiz.
    """
    if activity_type == 'quiz':
        return 'attempt_new_quiz' # Gi·∫£ ƒë·ªãnh ban ƒë·∫ßu l√† l√†m quiz m·ªõi
    elif activity_type == 'resource' or activity_type == 'hvp':
        return 'read_new_resource'
    else:
        return 'skip_to_next_module' # H√†nh ƒë·ªông m·∫∑c ƒë·ªãnh n·∫øu kh√¥ng kh·ªõp

# ƒê·ªãnh nghƒ©a c√°c kho·∫£ng r·ªùi r·∫°c cho tr·∫°ng th√°i (States)
# ƒêi·ªÉm_trung_b√¨nh_quiz_module: 0-2 (K√©m), 2-5 (Trung b√¨nh), 5-8 (Kh√°), 8-10 (Gi·ªèi)
QUIZ_SCORE_BINS = [2, 5, 8, 10]
# T·ª∑_l·ªá_ƒë·ªçc_t√†i_li·ªáu: 0-40% (0), 41-80% (1), 81-100% (2)
READ_RATE_BINS = [0.4, 0.8, 1.0]

# Kh·ªüi t·∫°o Q-table (s·∫Ω ƒë∆∞·ª£c t·∫£i ho·∫∑c hu·∫•n luy·ªán)
q_table = {}

# --- H√†m r·ªùi r·∫°c h√≥a tr·∫°ng th√°i ---
def discretize_state(avg_quiz_score, completion_rate, quiz_passed, cluster):
    """
    Chuy·ªÉn ƒë·ªïi c√°c gi√° tr·ªã li√™n t·ª•c/boolean c·ªßa state th√†nh c√°c bin r·ªùi r·∫°c.
    """
    # R·ªùi r·∫°c h√≥a ƒëi·ªÉm trung b√¨nh quiz
    quiz_bin = 0
    if avg_quiz_score > QUIZ_SCORE_BINS[2]: # > 8
        quiz_bin = 3
    elif avg_quiz_score > QUIZ_SCORE_BINS[1]: # > 5
        quiz_bin = 2
    elif avg_quiz_score > QUIZ_SCORE_BINS[0]: # > 2
        quiz_bin = 1

    # R·ªùi r·∫°c h√≥a t·ª∑ l·ªá ƒë·ªçc t√†i li·ªáu
    read_bin = 0
    if completion_rate > READ_RATE_BINS[1]: # > 0.8
        read_bin = 2
    elif completion_rate > READ_RATE_BINS[0]: # > 0.4
        read_bin = 1
    
    # quiz_passed v√† cluster ƒë√£ ·ªü d·∫°ng r·ªùi r·∫°c (0, 1)
    # ƒê·∫£m b·∫£o cluster l√† s·ªë nguy√™n (ho·∫∑c 0 n·∫øu None)
    cluster_val = int(cluster) if cluster is not None else 0
    return (quiz_bin, read_bin, int(quiz_passed), cluster_val)

# --- H√†m l·∫•y gi√° tr·ªã Q-value ---
def get_q_value(state, action):
    """
    L·∫•y Q-value cho m·ªôt c·∫∑p (state, action). Kh·ªüi t·∫°o n·∫øu ch∆∞a c√≥.
    """
    if state not in q_table:
        q_table[state] = {a: 0.0 for a in ACTIONS}
    return q_table[state][action]

# --- H√†m c·∫≠p nh·∫≠t Q-table ---
def update_q_table(state, action, reward, next_state):
    """
    C·∫≠p nh·∫≠t Q-value theo c√¥ng th·ª©c Q-learning.
    """
    current_q = get_q_value(state, action)
    
    # L·∫•y Q-value t·ªëi ƒëa c·ªßa tr·∫°ng th√°i ti·∫øp theo
    if next_state not in q_table:
        max_future_q = 0.0
    else:
        max_future_q = max(q_table[next_state].values())

    # C√¥ng th·ª©c Q-learning
    new_q = current_q + LEARNING_RATE * (reward + DISCOUNT_FACTOR * max_future_q - current_q)
    q_table[state][action] = new_q

# --- H√†m t√≠nh to√°n ph·∫ßn th∆∞·ªüng (Reward) ---
def get_reward(current_state, action, next_state):
    """
    T√≠nh to√°n ph·∫ßn th∆∞·ªüng d·ª±a tr√™n s·ª± thay ƒë·ªïi tr·∫°ng th√°i v√† h√†nh ƒë·ªông.
    """
    reward = 0
    # Ph·∫ßn th∆∞·ªüng khi quiz ƒë∆∞·ª£c pass (t·ª´ 0 -> 1)
    if current_state[2] == 0 and next_state[2] == 1:
        reward += 10
    
    # Ph·∫ßn th∆∞·ªüng khi completion rate tƒÉng
    if next_state[1] > current_state[1]:
        reward += 5
    
    # Ph·∫ßn th∆∞·ªüng khi quiz score bin tƒÉng
    # ƒêi·ªÅu n√†y gi√∫p khuy·∫øn kh√≠ch c·∫£i thi·ªán ƒëi·ªÉm s·ªë, ngay c·∫£ khi ch·ªâ tƒÉng 1 bin
    if next_state[0] > current_state[0]:
        reward += 7 
    
    # H√¨nh ph·∫°t khi quiz score bin gi·∫£m
    if next_state[0] < current_state[0]:
        reward -= 5
    
    # Th√™m ph·∫ßn th∆∞·ªüng/ph·∫°t cho c√°c h√†nh ƒë·ªông c·ª• th·ªÉ n·∫øu c·∫ßn
    # V√≠ d·ª•: n·∫øu action l√† 'skip_to_next_module' nh∆∞ng ƒëi·ªÉm th·∫•p -> ph·∫°t
    # if action == 'skip_to_next_module' and next_state[0] < 2: # ƒêi·ªÉm k√©m
    #     reward -= 15
    
    return reward

# --- H√†m l∆∞u Q-table v√†o file CSV ---
def save_q_table_to_csv(q_table, filename='q_table_results.csv'):
    """
    L∆∞u Q-table ƒë√£ h·ªçc v√†o file CSV.
    """
    q_table_data = []
    for state, actions_dict in q_table.items():
        for action, q_value in actions_dict.items():
            row = {
                'quiz_bin': state[0],
                'read_bin': state[1],
                'quiz_passed': state[2],
                'cluster': state[3],
                'action': action,
                'q_value': q_value
            }
            q_table_data.append(row)
    
    df = pd.DataFrame(q_table_data)
    df.to_csv(filename, index=False)
    print(f"‚úÖ Q-table ƒë√£ ƒë∆∞·ª£c l∆∞u v√†o file '{filename}' th√†nh c√¥ng.")

# --- H√†m t·∫£i Q-table t·ª´ file CSV ---
def load_q_table_from_csv(filename='q_table_results.csv'):
    """
    T·∫£i Q-table t·ª´ file CSV ƒë√£ l∆∞u.
    """
    q_table = {}
    if not os.path.exists(filename):
        print(f"‚ùå Kh√¥ng t√¨m th·∫•y file '{filename}'. Vui l√≤ng hu·∫•n luy·ªán m√¥ h√¨nh tr∆∞·ªõc.")
        return {}
    
    df = pd.read_csv(filename)
    for _, row in df.iterrows():
        state = (row['quiz_bin'], row['read_bin'], row['quiz_passed'], row['cluster'])
        action = row['action']
        q_value = row['q_value']
        
        if state not in q_table:
            q_table[state] = {a: 0.0 for a in ACTIONS}
        
        q_table[state][action] = q_value
    print(f"‚úÖ Q-table ƒë√£ ƒë∆∞·ª£c t·∫£i t·ª´ file '{filename}' th√†nh c√¥ng.")
    return q_table

# --- H√†m hu·∫•n luy·ªán m√¥ h√¨nh t·ª´ d·ªØ li·ªáu l·ªãch s·ª≠ ---
def train_from_csv(file_path):
    """
    Hu·∫•n luy·ªán Q-table t·ª´ d·ªØ li·ªáu l·ªãch s·ª≠ trong file CSV.
    """
    print("üöÄ B·∫Øt ƒë·∫ßu hu·∫•n luy·ªán m√¥ h√¨nh Q-learning...")
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"‚ùå Kh√¥ng t√¨m th·∫•y file d·ªØ li·ªáu l·ªãch s·ª≠: {file_path}. Kh√¥ng th·ªÉ hu·∫•n luy·ªán.")
        return

    df = df.sort_values(by=['userid', 'time'])

    for userid in df['userid'].unique():
        user_data = df[df['userid'] == userid].reset_index(drop=True)
        for i in range(len(user_data) - 1):
            current_row = user_data.iloc[i]
            next_row = user_data.iloc[i+1]
            
            # ƒê·∫£m b·∫£o c√°c d√≤ng n√†y thu·ªôc c√πng m·ªôt module ƒë·ªÉ so s√°nh tr·∫°ng th√°i
            if current_row['sectionid'] != next_row['sectionid']:
                continue

            state = discretize_state(
                current_row['avg_quiz_score'],
                current_row['completion_rate'],
                current_row['quiz_passed'],
                current_row['cluster']
            )
            
            action = get_action_from_type(next_row['type'])
            
            next_state = discretize_state(
                next_row['avg_quiz_score'],
                next_row['completion_rate'],
                next_row['quiz_passed'],
                next_row['cluster']
            )

            reward = get_reward(state, action, next_state)
            update_q_table(state, action, reward, next_state)
    print("‚úÖ Hu·∫•n luy·ªán Q-learning ho√†n t·∫•t.")

# --- H√†m g·ª£i √Ω h√†nh ƒë·ªông t·ªët nh·∫•t ---
def suggest_next_action(current_state, q_table):
    """
    G·ª£i √Ω h√†nh ƒë·ªông t·ªët nh·∫•t d·ª±a tr√™n tr·∫°ng th√°i hi·ªán t·∫°i v√† Q-table.
    """
    if current_state not in q_table:
        # N·∫øu tr·∫°ng th√°i ch∆∞a t·ª´ng ƒë∆∞·ª£c h·ªçc, ch·ªçn h√†nh ƒë·ªông ng·∫´u nhi√™n
        return random.choice(ACTIONS), 0.0
    
    q_values = q_table[current_state]
    best_action = max(q_values, key=q_values.get)
    best_q_value = q_values[best_action]
    
    return best_action, best_q_value

# --- H√†m ch√≠nh ƒë·ªÉ theo d√µi log v√† ƒë∆∞a ra g·ª£i √Ω ---
def track_and_suggest_new_rows(log_file_path, trained_q_table, poll_interval=2):
    """
    Theo d√µi file log m·ªõi, t√≠nh to√°n tr·∫°ng th√°i v√† ƒë∆∞a ra g·ª£i √Ω h√†nh ƒë·ªông.
    """
    print(f"\nüîç ƒêang theo d√µi file log: {log_file_path} v√† g·ª£i √Ω h√†nh ƒë·ªông...")
    seen_lines = 0
    output_path = './user_insight.csv' # File ƒë·ªÉ ghi c√°c insight ƒë√£ t√≠nh to√°n

    # T·∫°o file output n·∫øu ch∆∞a c√≥
    if not os.path.exists(output_path):
        with open(output_path, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([
                'userid', 'courseid', 'sectionid', 'type', 
                'avg_quiz_score', 'completion_rate', 'quiz_passed', 
                'cluster', 'time'
            ])

    while True:
        try:
            # ƒê·ªçc file log m·ªõi nh·∫•t
            df_log = pd.read_csv(log_file_path)
            current_lines = len(df_log)

            if current_lines > seen_lines:
                new_rows = df_log.iloc[seen_lines:]
                print(f"\nüÜï Ph√°t hi·ªán {len(new_rows)} d√≤ng log m·ªõi:")
                seen_lines = current_lines

                for _, row in new_rows.iterrows():
                    userid = int(row['userid'])
                    courseid = int(row['courseid'])
                    sectionid = int(row['sectionid'])
                    typ = str(row['type'])
                    timestamp = int(row['time'])

                    print(f"\n--- X·ª≠ l√Ω h√†nh ƒë·ªông m·ªõi c·ªßa User {userid} ({typ}) t·∫°i Section {sectionid} ---")

                    # G·ªçi API Moodle ƒë·ªÉ l·∫•y d·ªØ li·ªáu th·ªùi gian th·ª±c
                    avg_quiz_score_data = call_api('local_userlog_get_avg_quiz_score', {
                        'userid': userid, 'courseid': courseid
                    })
                    avg_quiz_score = avg_quiz_score_data.get('avg_quiz_score', 0.0)

                    total_resource_data = call_api('local_userlog_get_total_resources_by_section', {
                        'sectionid': sectionid,
                        'objecttypes[0]': 'resource',
                        'objecttypes[1]': 'hvp',
                    })
                    total_resource = total_resource_data.get('total_resources', 0)

                    viewed_resource_data = call_api('local_userlog_get_viewed_resources_distinct_by_section', {
                        'userid': userid,
                        'courseid': courseid,
                        'sectionid': sectionid,
                        'objecttypes[0]': 'resource',
                        'objecttypes[1]': 'hvp',
                    })
                    viewed_resource = viewed_resource_data.get('viewed_resources', 0)

                    completion_rate = viewed_resource / total_resource if total_resource > 0 else 0

                    quiz_passed_data = call_api('local_userlog_get_latest_quiz_pass_status_by_section', {
                        'userid': userid,
                        'sectionid': sectionid,
                    })
                    quiz_passed = quiz_passed_data.get('is_passed', 0)

                    cluster = get_user_cluster(userid)

                    # Ghi insight v√†o file user_insight.csv
                    with open(output_path, 'a', newline='') as f:
                        writer = csv.writer(f)
                        writer.writerow([
                            userid,
                            courseid,
                            sectionid,
                            typ,
                            avg_quiz_score,
                            round(completion_rate, 2),
                            int(quiz_passed),
                            cluster,
                            timestamp
                        ])
                    
                    # T√≠nh to√°n tr·∫°ng th√°i hi·ªán t·∫°i c·ªßa ng∆∞·ªùi d√πng
                    current_state = discretize_state(
                        avg_quiz_score,
                        completion_rate,
                        quiz_passed,
                        cluster
                    )

                    # G·ª£i √Ω h√†nh ƒë·ªông t·ªët nh·∫•t
                    best_action, q_value = suggest_next_action(current_state, trained_q_table)
                    print(f"üëâ Tr·∫°ng th√°i hi·ªán t·∫°i c·ªßa User {userid}: {current_state}")
                    print(f"üí° H·ªá th·ªëng ƒë·ªÅ xu·∫•t h√†nh ƒë·ªông: '{best_action}' (Q-value = {q_value:.2f})")
            
            time.sleep(poll_interval)

        except FileNotFoundError:
            print("‚ö†Ô∏è File log ch∆∞a t·ªìn t·∫°i. ƒê·ª£i...")
            time.sleep(poll_interval)
        except pd.errors.EmptyDataError:
            print("‚ö†Ô∏è File log r·ªóng. ƒê·ª£i d·ªØ li·ªáu m·ªõi...")
            time.sleep(poll_interval)
        except Exception as e:
            print(f"‚ùå L·ªói trong qu√° tr√¨nh theo d√µi v√† g·ª£i √Ω: {e}")
            # C√≥ th·ªÉ th√™m logic ƒë·ªÉ tho√°t ho·∫∑c ti·∫øp t·ª•c t√πy theo lo·∫°i l·ªói
            time.sleep(poll_interval) # ƒê·ª£i tr∆∞·ªõc khi th·ª≠ l·∫°i

# --- Quy tr√¨nh ch·∫°y ch√≠nh ---
if __name__ == "__main__":
    # B∆∞·ªõc 1: Hu·∫•n luy·ªán m√¥ h√¨nh t·ª´ d·ªØ li·ªáu l·ªãch s·ª≠ (ch·ªâ ch·∫°y khi c·∫ßn)
    # N·∫øu b·∫°n ƒë√£ c√≥ file user_insight.csv ƒë∆∞·ª£c t·∫°o ra t·ª´ l·∫ßn ch·∫°y tr∆∞·ªõc,
    # b·∫°n c√≥ th·ªÉ b·ªè qua b∆∞·ªõc n√†y v√† ch·ªâ t·∫£i Q-table.
    # Tuy nhi√™n, ƒë·ªÉ ƒë·∫£m b·∫£o Q-table ƒë∆∞·ª£c c·∫≠p nh·∫≠t v·ªõi d·ªØ li·ªáu m·ªõi nh·∫•t,
    # b·∫°n n√™n ch·∫°y l·∫°i b∆∞·ªõc n√†y ƒë·ªãnh k·ª≥.
    train_from_csv('user_insight.csv') # File n√†y ƒë∆∞·ª£c t·∫°o ra t·ª´ track_new_rows

    # B∆∞·ªõc 2: T·∫£i Q-table ƒë√£ hu·∫•n luy·ªán ƒë·ªÉ s·ª≠ d·ª•ng
    # N·∫øu kh√¥ng c√≥ file q_table_results.csv, h√†m n√†y s·∫Ω tr·∫£ v·ªÅ dictionary r·ªóng.
    trained_q_table = load_q_table_from_csv('q_table_results.csv')

    # B∆∞·ªõc 3: B·∫Øt ƒë·∫ßu theo d√µi file log v√† ƒë∆∞a ra g·ª£i √Ω th·ªùi gian th·ª±c
    # Thay ƒë·ªïi ƒë∆∞·ªùng d·∫´n t·ªõi file log th·ª±c t·∫ø c·ªßa Moodle
    log_file_path = "/Users/nguyenhuuloc/Documents/MyComputer/moodledata/local_userlog_data/user_log_summary.csv"
    track_and_suggest_new_rows(log_file_path, trained_q_table)



üöÄ B·∫Øt ƒë·∫ßu hu·∫•n luy·ªán m√¥ h√¨nh Q-learning...
‚úÖ Hu·∫•n luy·ªán Q-learning ho√†n t·∫•t.
‚úÖ Q-table ƒë√£ ƒë∆∞·ª£c t·∫£i t·ª´ file 'q_table_results.csv' th√†nh c√¥ng.

üîç ƒêang theo d√µi file log: /Users/nguyenhuuloc/Documents/MyComputer/moodledata/local_userlog_data/user_log_summary.csv v√† g·ª£i √Ω h√†nh ƒë·ªông...

üÜï Ph√°t hi·ªán 20 d√≤ng log m·ªõi:

--- X·ª≠ l√Ω h√†nh ƒë·ªông m·ªõi c·ªßa User 4 (quiz) t·∫°i Section 42 ---
üëâ Tr·∫°ng th√°i hi·ªán t·∫°i c·ªßa User 4: (0, 2, 1, 0)
üí° H·ªá th·ªëng ƒë·ªÅ xu·∫•t h√†nh ƒë·ªông: 'attempt_new_quiz' (Q-value = 0.25)

--- X·ª≠ l√Ω h√†nh ƒë·ªông m·ªõi c·ªßa User 4 (resource) t·∫°i Section 38 ---
üëâ Tr·∫°ng th√°i hi·ªán t·∫°i c·ªßa User 4: (0, 2, 0, 0)
üí° H·ªá th·ªëng ƒë·ªÅ xu·∫•t h√†nh ƒë·ªông: 'read_new_resource' (Q-value = 0.13)

--- X·ª≠ l√Ω h√†nh ƒë·ªông m·ªõi c·ªßa User 4 (hvp) t·∫°i Section 38 ---
üëâ Tr·∫°ng th√°i hi·ªán t·∫°i c·ªßa User 4: (0, 2, 0, 0)
üí° H·ªá th·ªëng ƒë·ªÅ xu·∫•t h√†nh ƒë·ªông: 'read_new_res

KeyboardInterrupt: 