# Model #1 : student generation and parameters learning

In [1]:
import sys

sys.path.append("/Users/olivier/PycharmProjects/bayesian-kst/")

import numpy as np
from kgraph.expert_layer.domain_graph import DomainGraph
from kgraph.expert_layer.knowledge_components import KnowledgeComponent, ProceduralKnowledgeComponent
from kgraph.expert_layer.links import LinkModel, LinkFromParents, LinkFromChildren
from kgraph.resources_layer.exercise_family import ExerciseFamily
from kgraph.resources_layer.exercise import Exercise
from kgraph.learner_layer.evaluation import Evaluation
from kgraph.learner_layer.learner_pool import LearnerPool
from kgraph.learner_layer.learner import Learner, LearnerGraph
from kgraph.helpers.truthtable import truthtable
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
import pyAgrum.lib.dynamicBN as gdyn

## Student generation

In [2]:
# we define the KCs
KC_A = ProceduralKnowledgeComponent(1, "A")
KC_B = ProceduralKnowledgeComponent(2, "B")
KC_C = ProceduralKnowledgeComponent(3, "C")

P_C_knowing_A_and_B = np.array([[1., 0.],
                                [.85, .15],
                                [.9, .1],
                                [.4, .6]])


# we define the link between the two that gonna lead to the link model
link_a_to_b = LinkFromParents(KC_B, [KC_A], [.4, .6])
link_b_to_a = LinkFromChildren(KC_A, [KC_B], [.1, .9])
link_model = LinkModel([LinkFromParents(KC_C, [KC_A, KC_B], P_C_knowing_A_and_B.flatten().tolist())])
# we create the associated domain graph
domain_graph = DomainGraph([KC_A, KC_B, KC_C], link_model)

In [3]:


# default params
params = {"slip": .1, "guess":.25}

# we define the exercises that corresponds to KC_A
ex_A_1 = Exercise(1, "qcm", ex_content="", params=params)
ex_A_2 = Exercise(2, "qcm", ex_content="", params=params)
ex_A_3 = Exercise(3, "qcm", ex_content="", params=params)
ex_A_4 = Exercise(4, "qcm", ex_content="", params=params)
ex_A_5 = Exercise(5, "qcm", ex_content="", params=params)

ex_fam_A = ExerciseFamily(1, "ex_fam_A", KC_A, [ex_A_1, ex_A_2, ex_A_3, ex_A_4, ex_A_5])

# we define the exercises that corresponds to KC_A
ex_B_1 = Exercise(6, "qcm", ex_content="", params=params)
ex_B_2 = Exercise(7, "qcm", ex_content="", params=params)
ex_B_3 = Exercise(8, "qcm", ex_content="", params=params)
ex_B_4 = Exercise(9, "qcm", ex_content="", params=params)
ex_B_5 = Exercise(10, "qcm", ex_content="", params=params)

ex_fam_B = ExerciseFamily(1, "ex_fam_B", KC_B, [ex_B_1, ex_B_2, ex_B_3, ex_B_4, ex_B_5])


# we define the exercises that corresponds to KC_C
ex_C_1 = Exercise(11, "qcm", ex_content="", params=params)
ex_C_2 = Exercise(12, "qcm", ex_content="", params=params)
ex_C_3 = Exercise(13, "qcm", ex_content="", params=params)
ex_C_4 = Exercise(14, "qcm", ex_content="", params=params)
ex_C_5 = Exercise(15, "qcm", ex_content="", params=params)

ex_fam_B = ExerciseFamily(1, "ex_fam_B", KC_C, [ex_C_1, ex_C_2, ex_C_3, ex_C_4, ex_C_5])


In [4]:
learner_pool = LearnerPool(domain_graph)
learner_pool.setup_random_learners(1000)

In [5]:
learner_pool.print_default_learner_graph()
for learner in learner_pool.learners:
    print(learner.learner_graph)

LearnerGraph of Learner 0
P(1) = 0.2 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.2 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.2 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})

LearnerGraph of Learner 1
P(1) = 0.6398582418953006 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.39478244581961097 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.21161487166211376 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})

LearnerGraph of Learner 2
P(1) = 0.9412400547136567 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.3743965874714962 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.27362246615878716 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})

LearnerGraph of Learner 3
P(1) = 0.6505459905587503 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.2822938669717069 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.17167447926285023 (params: {'learn': 0.1, 'del

In [6]:
print([learner.id for learner in learner_pool.learners])
n_evals = 3000
sim_evaluations = learner_pool.simulate_evaluations_from_learners(n_evals)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 22

In [7]:
knowledge_components = domain_graph.get_knowledge_components()
import pandas as pd

col_names = [[[f"{kc.name}_{x}", f"eval_{kc.name}_{x}"] for x in ("i-1", "i")] for kc in knowledge_components]

col_names = [element for li in col_names for element in li]
col_names = [element for li in col_names for element in li]


temp_eval = []
for i in range(len(sim_evaluations)):
    temp = []
    for evaluation in sim_evaluations[i]:
        temp += [(evaluation.get_kc(), int(evaluation.answers[key]["success"])) for key in list(evaluation.answers.keys())]
    temp_eval.append(temp)

n_df_rows = sum([
    sum([len(list(evaluation.answers.keys())) - 1 for evaluation in sim_evaluations[i]]) 
    + len(sim_evaluations[i]) - 1 if sim_evaluations[i] else 0
        for i in range(len(sim_evaluations))])

data = pd.DataFrame([["?" for _ in range(len(col_names))] for _ in range(n_df_rows)], columns=col_names)

df_idx = 0
for learner_idx in range(len(temp_eval)):
    if sim_evaluations[learner_idx]:
        for i in range(len(temp_eval[learner_idx])-1):

            prev_kc, prev_succ = temp_eval[learner_idx][i]
            data.iloc[df_idx][f"eval_{prev_kc.name}_i-1"] = int(prev_succ)
            next_kc, next_succ = temp_eval[learner_idx][i+1]
            data.iloc[df_idx][f"eval_{next_kc.name}_i"] = int(next_succ)

            df_idx += 1
            
            
print(data)

      A_i-1 eval_A_i-1 A_i eval_A_i B_i-1 eval_B_i-1 B_i eval_B_i C_i-1  \
0         ?          1   ?        1     ?          ?   ?        ?     ?   
1         ?          1   ?        1     ?          ?   ?        ?     ?   
2         ?          1   ?        1     ?          ?   ?        ?     ?   
3         ?          1   ?        1     ?          ?   ?        ?     ?   
4         ?          1   ?        ?     ?          ?   ?        0     ?   
...     ...        ...  ..      ...   ...        ...  ..      ...   ...   
14041     ?          ?   ?        ?     ?          0   ?        ?     ?   
14042     ?          ?   ?        ?     ?          ?   ?        ?     ?   
14043     ?          ?   ?        ?     ?          ?   ?        ?     ?   
14044     ?          ?   ?        ?     ?          ?   ?        ?     ?   
14045     ?          ?   ?        ?     ?          ?   ?        ?     ?   

      eval_C_i-1 C_i eval_C_i  
0              ?   ?        ?  
1              ?   ?        ?  
2  

In [8]:
print(data)
data.to_csv("missing_values_data.csv")

      A_i-1 eval_A_i-1 A_i eval_A_i B_i-1 eval_B_i-1 B_i eval_B_i C_i-1  \
0         ?          1   ?        1     ?          ?   ?        ?     ?   
1         ?          1   ?        1     ?          ?   ?        ?     ?   
2         ?          1   ?        1     ?          ?   ?        ?     ?   
3         ?          1   ?        1     ?          ?   ?        ?     ?   
4         ?          1   ?        ?     ?          ?   ?        0     ?   
...     ...        ...  ..      ...   ...        ...  ..      ...   ...   
14041     ?          ?   ?        ?     ?          0   ?        ?     ?   
14042     ?          ?   ?        ?     ?          ?   ?        ?     ?   
14043     ?          ?   ?        ?     ?          ?   ?        ?     ?   
14044     ?          ?   ?        ?     ?          ?   ?        ?     ?   
14045     ?          ?   ?        ?     ?          ?   ?        ?     ?   

      eval_C_i-1 C_i eval_C_i  
0              ?   ?        ?  
1              ?   ?        ?  
2  

## 1) Réalisation de l'exercice associé à KC A

### Classic BKT
**Nodes** :
- for every KC for every step of the evaluation on an exercise family
- for every exercise result in the exercise family associated to the evaluation

**Links**:
- between evaluated KC nodes between successive temporalities 
- between evaluated KC and its parents/children for a same temporality
- between evaluated KC and exercise result node

In [9]:
# we initialize the network
fast_bkt_net_str = ""
# we setup every node

for kc in domain_graph.get_knowledge_components():
    fast_bkt_net_str += f"{kc.name}_i-1" + "->" + f"eval_{kc.name}_i-1" + ";"
    fast_bkt_net_str += f"{kc.name}_i" + "->" + f"eval_{kc.name}_i" + ";"
    fast_bkt_net_str += f"{kc.name}_i-1" + "->" + f"{kc.name}_i" + ";"
for kc in domain_graph.get_knowledge_components():
    kc_parents = domain_graph.link_model.get_parents(kc)
    for parent in kc_parents:
        fast_bkt_net_str += f"{parent.name}_i-1" + "->" + f"{kc.name}_i-1" + ";"
        fast_bkt_net_str += f"{parent.name}_i" + "->" + f"{kc.name}_i" + ";"

print(fast_bkt_net_str)
fast_bkt_net = gum.fastBN(fast_bkt_net_str[:-1])

A_i-1->eval_A_i-1;A_i->eval_A_i;A_i-1->A_i;B_i-1->eval_B_i-1;B_i->eval_B_i;B_i-1->B_i;C_i-1->eval_C_i-1;C_i->eval_C_i;C_i-1->C_i;A_i-1->C_i-1;A_i->C_i;B_i-1->C_i-1;B_i->C_i;


In [10]:
learner = gum.BNLearner("missing_values_data.csv", fast_bkt_net)
print(f"Missing values in missing_values_data.csv : {learner.hasMissingValues()}")



Missing values in missing_values_data.csv : True


In [11]:
fast_bkt_net

In [12]:
learner.setVerbosity(True)
learner.useEM(1e-8)
bn=learner.learnParameters(fast_bkt_net.dag())
print(f"# iterations : {learner.nbrIterations()}")

# iterations : 30


In [13]:
gnb.sideBySide(gnb.getInference(fast_bkt_net),gnb.getInference(bn))

0,1
"structs Inference in 1.39ms A_i-1  2021-05-21T11:28:12.665320  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  eval_A_i-1  2021-05-21T11:28:12.692102  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i-1->eval_A_i-1 A_i  2021-05-21T11:28:12.718623  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i-1->A_i C_i-1  2021-05-21T11:28:12.877780  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i-1->C_i-1 eval_A_i  2021-05-21T11:28:12.744571  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i->eval_A_i C_i  2021-05-21T11:28:12.965406  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i->C_i B_i-1  2021-05-21T11:28:12.771010  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  eval_B_i-1  2021-05-21T11:28:12.797752  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_i-1->eval_B_i-1 B_i  2021-05-21T11:28:12.824482  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_i-1->B_i B_i-1->C_i-1 eval_B_i  2021-05-21T11:28:12.850895  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_i->eval_B_i B_i->C_i eval_C_i-1  2021-05-21T11:28:12.938344  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_i-1->eval_C_i-1 C_i-1->C_i eval_C_i  2021-05-21T11:28:12.991754  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_i->eval_C_i","structs Inference in 1.69ms A_i-1  2021-05-21T11:28:13.402535  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  eval_A_i-1  2021-05-21T11:28:13.429250  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i-1->eval_A_i-1 A_i  2021-05-21T11:28:13.455919  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i-1->A_i C_i-1  2021-05-21T11:28:13.655521  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i-1->C_i-1 eval_A_i  2021-05-21T11:28:13.481957  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i->eval_A_i C_i  2021-05-21T11:28:13.707580  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_i->C_i B_i-1  2021-05-21T11:28:13.508208  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  eval_B_i-1  2021-05-21T11:28:13.534072  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_i-1->eval_B_i-1 B_i  2021-05-21T11:28:13.603054  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_i-1->B_i B_i-1->C_i-1 eval_B_i  2021-05-21T11:28:13.629232  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_i->eval_B_i B_i->C_i eval_C_i-1  2021-05-21T11:28:13.681359  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_i-1->eval_C_i-1 C_i-1->C_i eval_C_i  2021-05-21T11:28:13.733274  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_i->eval_C_i"


In [14]:
print(learner.history())

(0.07946054225130018, 0.05475891349999342, 0.038181135861092966, 0.026210456419333127, 0.017792423219705453, 0.01198332849516338, 0.008026907291904471, 0.005356427223941663, 0.003564878760784117, 0.002367899098290687, 0.00157035882698965, 0.0010399356166799834, 0.0006974581127332707, 0.0004892496039723058, 0.00034787519782391826, 0.00025191999186806573, 0.00018681303928030652, 0.0001426497249224154, 0.0001127018092719917, 9.240081663621225e-05, 7.864568983781604e-05, 6.93318800439922e-05, 6.303132196607847e-05, 5.8775079310535074e-05, 5.5905772594101665e-05, 5.397742089698267e-05, 5.2687468476150267e-05, 5.183066482073703e-05, 5.1267772279735207e-05, 5.090433526100558e-05)


In [15]:
print(bn.cpt("A_i"))


      ||  A_i              |
A_i-1 ||0        |1        |
------||---------|---------|
0     || 0.4939  | 0.5061  |
1     || 0.4544  | 0.5456  |

