## Get log for K-means clustering

In [1]:
import requests
import csv

MOODLE_URL = 'http://localhost:8100/webservice/rest/server.php'
TOKEN = '84cffdbb9ead18d97ccc45f9889bc926'
USER_ID = 4
COURSE_ID = 5
FORMAT = 'json'

def call_api(function, extra_params):
    params = {
        'wstoken': TOKEN,
        'moodlewsrestformat': FORMAT,
        'wsfunction': function
    }
    params.update(extra_params)
    
    response = requests.post(MOODLE_URL, data=params)
    return response.json()

def main():
    # Gọi API để lấy dữ liệu gốc
    quiz_attempts = call_api('local_userlog_get_quiz_attempts', {'userid': USER_ID, 'courseid': COURSE_ID}).get('quiz_attempts', 0)
    total_time = call_api('local_userlog_get_total_quiz_time', {'userid': USER_ID, 'courseid': COURSE_ID}).get('total_quiz_time', 0)
    resource_views = call_api('local_userlog_get_resource_views', {
        'userid': USER_ID,
        'courseid': COURSE_ID,
        'objecttypes[0]': 'resource',
        'objecttypes[1]': 'hvp',
        'objecttypes[2]': 'quiz'
    }).get('resource_views', 0)
    learning_days = call_api('local_userlog_get_learning_days', {
        'userid': USER_ID,
        'courseid': COURSE_ID,
        'objecttypes[0]': 'resource',
        'objecttypes[1]': 'hvp',
        'objecttypes[2]': 'quiz'
    }).get('num_learning_days', 1)
    pass_quiz_count = call_api('local_userlog_get_pass_quiz_count_attempt', {'userid': USER_ID, 'courseid': COURSE_ID}).get('pass_quiz_count', 0)
    avg_quiz_score = call_api('local_userlog_get_avg_quiz_score', {'userid': USER_ID, 'courseid': COURSE_ID}).get('avg_quiz_score', 0.0)

    # Tính các đặc trưng
    avg_time_per_quiz = round(total_time / quiz_attempts, 2) if quiz_attempts else 0
    avg_resource_views_per_day = round(resource_views / learning_days, 2)
    quiz_success_rate = round(pass_quiz_count / quiz_attempts, 2) if quiz_attempts else 0
    print(f"[DEBUG] quiz_success_rate = {quiz_success_rate}")
    print(f"[DEBUG] quiz_attempts = {quiz_attempts}")
    print(f"[DEBUG] pass_quiz_count = {pass_quiz_count}")

    resource_vs_quiz_ratio = round(resource_views / quiz_attempts, 2) if quiz_attempts else 0

    # Dữ liệu đầu ra
    final_features = {
        'userid': USER_ID,
        'courseid': COURSE_ID,
        'avg_time_per_quiz': avg_time_per_quiz,
        'avg_resource_views_per_day': avg_resource_views_per_day,
        'quiz_success_rate': quiz_success_rate,
        'avg_quiz_score': avg_quiz_score,
        'resource_vs_quiz_ratio': resource_vs_quiz_ratio
    }

    # Ghi ra file CSV
    with open('user_features.csv', 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=final_features.keys())
        writer.writeheader()
        writer.writerow(final_features)

    print("✅ Đã ghi user_features.csv thành công.")

if __name__ == '__main__':
    main()

[DEBUG] quiz_success_rate = 0.83
[DEBUG] quiz_attempts = 36
[DEBUG] pass_quiz_count = 30
✅ Đã ghi user_features.csv thành công.


### Tạo log ảo cho Kmean theo cơ chế thật

In [4]:
import csv
import random

def read_base_from_csv(filename='user_features.csv'):
    with open(filename, 'r', newline='') as f:
        reader = csv.DictReader(f)
        first_row = next(reader)
        base = {
            'avg_time_per_quiz': float(first_row['avg_time_per_quiz']),
            'avg_resource_views_per_day': float(first_row['avg_resource_views_per_day']),
            'quiz_success_rate': float(first_row['quiz_success_rate']),
            'avg_quiz_score': float(first_row['avg_quiz_score']),
            'resource_vs_quiz_ratio': float(first_row['resource_vs_quiz_ratio']),
        }
        return base

def generate_data(base, userid_start=5, count=100, courseid=5):
    data = []
    for i in range(count):
        userid = userid_start + i

        # Số tài liệu xem mỗi ngày
        views = max(0, round(random.gauss(base['avg_resource_views_per_day'], 3), 2))  # Gaussian để phân bố tự nhiên hơn

        # Thời gian làm quiz
        time_per_quiz = round(random.uniform(10, 25), 2)  # phút

        # Quiz success rate phụ thuộc vào số tài liệu xem
        success_rate = min(1.0, max(0.1, 0.5 + (views - 5) * 0.03 + random.uniform(-0.1, 0.1)))

        # Điểm quiz trung bình phụ thuộc vào success_rate
        quiz_score = round(min(10, max(0, success_rate * 10 + random.uniform(-1.5, 1.5))), 2)

        # Tỉ lệ tài liệu/quiz
        resource_vs_quiz_ratio = round(views / max(1, (10 - success_rate * 5)), 2)

        row = {
            'userid': userid,
            'courseid': courseid,
            'avg_time_per_quiz': time_per_quiz,
            'avg_resource_views_per_day': views,
            'quiz_success_rate': round(success_rate, 2),
            'avg_quiz_score': quiz_score,
            'resource_vs_quiz_ratio': resource_vs_quiz_ratio,
        }
        data.append(row)

    return data

def save_csv(data, filename='synthetic_user_features.csv'):
    if not data:
        return
    with open(filename, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)
    print(f"✅ Saved {len(data)} rows to {filename}")

if __name__ == '__main__':
    base = read_base_from_csv('user_features.csv')
    synthetic_data = generate_data(base, userid_start=4, count=1000)
    save_csv(synthetic_data)

✅ Saved 1000 rows to synthetic_user_features.csv
