In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
from metrics_implementation import MeanTriesCount, TriesStd, SkipsPercentage, GuessedPercentage, LostPercentage, SolvedPercentage
from other_metrics import N_tries, diff_tries, percentage_tries, TaskTime, TaskTimeDeviation
from functools import reduce

def calculate_course(course_dir, course_name):
    global percentage_tries
    global N_tries
    global diff_tries
    course = pd.read_csv(course_dir + "course.csv")
    course_element = pd.read_csv(course_dir + "course_element.csv")
    course_graph = pd.read_csv(course_dir + "course_graph.csv")
    course_module = pd.read_csv(course_dir + "course_module.csv")
    solution_log = pd.read_csv(course_dir + "solution_log.csv")
    user_course_progress = pd.read_csv(course_dir + "user_course_progress.csv")
    user_element_progress = pd.read_csv(course_dir + "user_element_progress.csv")
    user_module_progress = pd.read_csv(course_dir + "user_module_progress.csv")

    data_tables = {
    'course': course,
    'course_module': course_module,
    'course_element': course_element,
    'course_graph': course_graph,
    'solution_log': solution_log,
    'user_course_progress': user_course_progress,
    'user_element_progress': user_element_progress,
    'user_module_progress': user_module_progress
    }


    mtc = MeanTriesCount(metric_name='mean_tries_count', data_tables=data_tables, 
                        parameters={'outlier': 65}, threshold=4)
    std = TriesStd(metric_name='tries_std', data_tables=data_tables, 
                        parameters={'outlier': 65}, threshold=5)
    skips = SkipsPercentage(metric_name='skips_percentage', data_tables=data_tables, 
                        parameters={}, threshold=0.2)
    lost = LostPercentage(metric_name='lost_percentage', data_tables=data_tables, 
                        parameters={}, threshold=0.15)
    guess = GuessedPercentage(metric_name='guessed_percentage', data_tables=data_tables, 
                        parameters={}, threshold=0.2)
    solved = SolvedPercentage('solved_percentage', threshold=0.88, parameters={},data_tables=data_tables)
    mean_time_metric = TaskTime(metric_name='mean_time', data_tables=data_tables, parameters={'metric' : 'mean'}, threshold=7.5)
    percentage_tries_metric = percentage_tries(metric_name='percentage_tries', threshold=0.9, parameters={}, data_tables=data_tables)
    N_tries_metric = N_tries(metric_name='N_tries', threshold=10, parameters={'N' : 7}, data_tables=data_tables)
    diff_tries_metric = diff_tries(metric_name='diff_tries', threshold=1, parameters={}, data_tables=data_tables)

    mean_tries_df = mtc.evaluate()
    tries_std_df = std.evaluate()
    skips_percentage_df = skips.evaluate()
    lost_percentage_df = lost.evaluate()
    guessed_percentage_df = guess.evaluate()
    solved_percentage_df = solved.evaluate()

    N_tries_df = N_tries_metric.evaluate()
    diff_tries_df = diff_tries_metric.evaluate()
    percentage_tries_df = percentage_tries_metric.evaluate()

    mean_time_df = mean_time_metric.evaluate()

    data_frames = [
        mean_tries_df,
        tries_std_df,
        skips_percentage_df,
        lost_percentage_df,
        guessed_percentage_df,
        solved_percentage_df,
        mean_time_df,
        N_tries_df,
        diff_tries_df,
        percentage_tries_df
    ]
    df_merged = reduce(
        lambda left, right: pd.merge(
            left, right, how="outer", on='element_id'
        ),
        data_frames,
    )
    df_merged = df_merged.set_index('element_id')
    df_merged.to_csv(f"course_{course_name}_metrics.csv")

In [None]:
courses_directory='data/courses/'
for file in os.listdir(courses_directory):
        course_name = os.fsdecode(file)
        print(f'Calculating course {course_name}')
        course_path = courses_directory + course_name + '/'
        calculate_course(course_path, course_name)

In [None]:
# course_dir = "data/courses/638/"

# course = pd.read_csv(course_dir + "course.csv")
# course_element = pd.read_csv(course_dir + "course_element.csv")
# course_graph = pd.read_csv(course_dir + "course_graph.csv")
# course_module = pd.read_csv(course_dir + "course_module.csv")
# solution_log = pd.read_csv(course_dir + "solution_log.csv")
# user_course_progress = pd.read_csv(course_dir + "user_course_progress.csv")
# user_element_progress = pd.read_csv(course_dir + "user_element_progress.csv")
# user_module_progress = pd.read_csv(course_dir + "user_module_progress.csv")

In [None]:
# from metrics_implementation import MeanTriesCount, TriesStd, SkipsPercentage, GuessedPercentage, LostPercentage, SolvedPercentage

# mtc = MeanTriesCount(metric_name='mean_tries_count', data_tables={'user_element_progress': user_element_progress}, 
#                     parameters={'outlier': 65}, threshold=4)
# std = TriesStd(metric_name='tries_std', data_tables={'user_element_progress': user_element_progress}, 
#                     parameters={'outlier': 65}, threshold=5)
# skips = SkipsPercentage(metric_name='skips_percentage', data_tables={'user_element_progress': user_element_progress, 'course_element': course_element}, 
#                     parameters={}, threshold=0.2)
# lost = LostPercentage(metric_name='lost_percentage', data_tables={'user_element_progress': user_element_progress, 'course_element': course_element}, 
#                     parameters={}, threshold=0.15)
# guess = GuessedPercentage(metric_name='guessed_percentage', data_tables={'user_element_progress': user_element_progress, 'solution_log': solution_log}, 
#                     parameters={}, threshold=0.2)
# solved = SolvedPercentage('solved_percentage', threshold=0.88, parameters={},data_tables={'user_element_progress': user_element_progress})

In [None]:
# data_tables = {
#     'course': course,
#     'course_module': course_module,
#     'course_element': course_element,
#     'course_graph': course_graph,
#     'solution_log': solution_log,
#     'user_course_progress': user_course_progress,
#     'user_element_progress': user_element_progress,
#     'user_module_progress': user_module_progress
# }

In [None]:
# from other_metrics import N_tries, diff_tries, percentage_tries, TaskTime, TaskTimeDeviation



# median_time_metric = TaskTime(metric_name='median_time', data_tables=data_tables, parameters={'metric' : 'median'}, threshold=1)
# mean_time_metric = TaskTime(metric_name='mean_time', data_tables=data_tables, parameters={'metric' : 'mean'}, threshold=7.5)
# median_dev_metric = TaskTimeDeviation(metric_name='median_dev', data_tables=data_tables, parameters={'metric' : 'median'}, threshold=1)
# mean_dev_metric = TaskTimeDeviation(metric_name='mean_dev', data_tables=data_tables, parameters={'metric' : 'mean'}, threshold=7.5)

# N_tries_metric = N_tries(metric_name='N_tries', threshold=10, parameters={'N' : 7}, data_tables=data_tables)
# diff_tries_metric = diff_tries(metric_name='diff_tries', threshold=1, parameters={}, data_tables=data_tables)
# percentage_tries_metric = percentage_tries(metric_name='percentage_tries', threshold=0.9, parameters={}, data_tables=data_tables)

In [None]:
# mean_tries = mtc.evaluate()
# tries_std = std.evaluate()
# skips_percentage = skips.evaluate()
# lost_percentage = lost.evaluate()
# guessed_percentage = guess.evaluate()
# solved_percentage = solved.evaluate()

In [None]:
# N_tries = N_tries_metric.evaluate()
# diff_tries = diff_tries_metric.evaluate()
# percentage_tries = percentage_tries_metric.evaluate()

In [None]:
# mean_time = mean_time_metric.evaluate()

In [None]:
# data_frames = [
#     mean_tries,
#     tries_std,
#     skips_percentage,
#     lost_percentage,
#     guessed_percentage,
#     solved_percentage,
#     mean_time,
#     N_tries,
#     diff_tries,
#     percentage_tries
# ]

In [None]:
# from functools import reduce

# df_merged = reduce(
#     lambda left, right: pd.merge(
#         left, right, how="outer", on='element_id'
#     ),
#     data_frames,
# )

In [None]:
# df_merged = df_merged.set_index('element_id')
# df_merged.head()

In [None]:
# df_merged.to_csv("ml_intro_metrics.csv")