In [1]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_absolute_error, confusion_matrix, ConfusionMatrixDisplay

In [2]:
EXAM_NAMES = ['Kuis 2', 'UTS']
EXAM_NUMBERS = {
    'Kuis 2': ['IA', 'IB', 'IC'],
    'UTS': ['IA', 'IB']
}
EXAMS = []
for exam_name in EXAM_NAMES:
    for exam_number in EXAM_NUMBERS[exam_name]:
        EXAMS.append((exam_name, exam_number))

LB = {
    'All': 0.6562801963,
    'Kuis 2 IA': 0.1447276516,
    'Kuis 2 IB': 0.3178719202,
    'Kuis 2 IC': 0.6346878475,
    'UTS IA': 0.5107134316,
    'UTS IB': 0.4047760493
}

X_AXIS = 'nilai autograder'
Y_AXIS = 'nilai manual'
EPS = 0.001

import os 
import sys

module_path = os.path.abspath(os.path.join('..')) 
if module_path not in sys.path: 
    sys.path.append(module_path) 
    
from grader.src.ged.classes.general_cost_function import RelabelMethod
from grader.src.grader import Grader, GraphPreprocessType

REAL_GRADE_FILENAME = 'Real Results.csv' 
AFILE_PREFIX = 'Grade_'

real_dataframe = pd.read_csv(REAL_GRADE_FILENAME)
dataframes = {}
for relabel_method in RelabelMethod.__iter__():
    for graph_preprocess_type in GraphPreprocessType.__iter__():
        filename = f'Train_{relabel_method.name}_{graph_preprocess_type.name}.csv'
        if filename not in os.listdir('./'):
            continue
#         dataframes[filename] = pd.read_csv(filename).rename(columns={'grade': 'nilai autograder'})
        dataframes[filename] = pd.read_csv(filename)
        dataframes[filename] = pd.merge(dataframes[filename], real_dataframe, on=['nim', 'exam_name', 'number'])

In [None]:
import math

RATE = 0.0001

# y = sigma x_i * w_i
# dE / dw_i = x_i * (y - t)
# delta w_i = -rate * (dE/dw_i) = -rate * x_i * (y - t)
X1_LABEL = 'total_node_cost'
X2_LABEL = 'total_edge_cost'
T_LABEL = 'nilai manual'

def normalized(w1, w2):
    norm = math.sqrt(w1 * w1 + w2 * w2)
    return w1 / norm, w2 / norm

def train(df, epoch=100):
    w1 = 1.0
    w2 = 1.0
    for cur_epoch in range(epoch):
        for id, row in df.iterrows():
            count1 = row['node_count']
            count2 = row['edge_count']
            
            x1 = row[X1_LABEL]
            x2 = row[X2_LABEL]
            t = (1.0 - (row[T_LABEL] / 100)) * (count1 * w1 + count2 * w2)
            
            y = x1 * w1 + x2 * w2
            delta_w1 = -RATE * x1 * (y - t)
            delta_w2 = -RATE * x2 * (y - t)
            
            w1 += delta_w1
            w2 += delta_w2
            w1, w2 = normalized(w1, w2)
            
    return w1, w2

results = {}
for filename, df in dataframes.items():
    w1, w2 = train(df, 200)
    new_row = [100 * (1 - (row[X1_LABEL] * w1 + row[X2_LABEL] * w2)) / (row['node_count'] * w1 + row['edge_count'] * w2) for id, row in df.iterrows()]
    df['nilai autograder'] = new_row
    corr = df.corr()
    results[filename] = {
        'weight': (w1, w2),
        'old_correlation': corr["grade"][Y_AXIS],
        'new_correlation': corr[X_AXIS][Y_AXIS]
    }
    
for filename, stats in results.items():
    print(f'{filename} training result\n--------------')
    for k, v in stats.items():
        print(f'{k}: {v}')
    print('--------------\n')

## Result 

Starts with weight (1, 1)
```
Train_NONE_UNCOLLAPSE.csv training result
--------------
weight: (0.8257784952978421, 0.5639945715196486)
old_correlation: 0.7237943416739918
new_correlation: 0.7309536273785884
--------------

Train_NONE_COLLAPSE.csv training result
--------------
weight: (0.48785594253967823, 0.8729241543964299)
old_correlation: 0.686587182493469
new_correlation: 0.6856247758236598
--------------

Train_NONE_PROPAGATE_BRANCHING.csv training result
--------------
weight: (0.42704726637701995, 0.9042293029314603)
old_correlation: 0.6493976092819717
new_correlation: 0.6492057126073356
--------------

Train_BOOLEAN_COUNT_UNCOLLAPSE.csv training result
--------------
weight: (0.8584438636207447, 0.5129075287143761)
old_correlation: 0.7236863160356296
new_correlation: 0.7292914890584136
--------------

Train_BOOLEAN_COUNT_COLLAPSE.csv training result
--------------
weight: (0.8565880656045796, 0.5160008583944453)
old_correlation: 0.6377400969554124
new_correlation: 0.678042439961036
--------------

Train_BOOLEAN_COUNT_PROPAGATE_BRANCHING.csv training result
--------------
weight: (0.5855415415769536, 0.8106424014864289)
old_correlation: 0.6363141615448296
new_correlation: 0.6520778501898457
--------------

Train_COUNTER_COLLAPSE.csv training result
--------------
weight: (0.8774999661590893, 0.47957669813158876)
old_correlation: 0.655214792911339
new_correlation: 0.6941486571893627
--------------

Train_COUNTER_PROPAGATE_BRANCHING.csv training result
--------------
weight: (0.6071869661112416, 0.7945589897450196)
old_correlation: 0.6408816580522574
new_correlation: 0.6552280575384303
--------------

Train_DAMERAU_LD_COLLAPSE.csv training result
--------------
weight: (0.8771657473231458, 0.480187725502253)
old_correlation: 0.6337564587828098
new_correlation: 0.6819078442276093
--------------

Train_DAMERAU_LD_PROPAGATE_BRANCHING.csv training result
--------------
weight: (0.6139905931782683, 0.789313341765232)
old_correlation: 0.646898025402812
new_correlation: 0.661430477139439
--------------

Train_EXACT_COLLAPSE.csv training result
--------------
weight: (0.922103988285686, 0.38694215948592586)
old_correlation: 0.6327693058925489
new_correlation: 0.6726393957117593
--------------

Train_EXACT_PROPAGATE_BRANCHING.csv training result
--------------
weight: (0.7543733917011383, 0.656445569634925)
old_correlation: 0.6154178430154562
new_correlation: 0.638491346018969
--------------
```