# Model #1 : student generation and parameters learning

In [1]:
import sys

sys.path.append("/Users/olivier/PycharmProjects/bayesian-kst/")

import numpy as np
from kgraph.expert_layer.domain_graph import DomainGraph
from kgraph.expert_layer.knowledge_components import KnowledgeComponent, ProceduralKnowledgeComponent
from kgraph.expert_layer.links import LinkModel, LinkFromParents, LinkFromChildren
from kgraph.resources_layer.exercise_family import ExerciseFamily
from kgraph.resources_layer.exercise import Exercise
from kgraph.learner_layer.evaluation import Evaluation
from kgraph.learner_layer.learner_pool import LearnerPool
from kgraph.learner_layer.learner import Learner, LearnerGraph
from kgraph.helpers.truthtable import truthtable
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
import pyAgrum.lib.dynamicBN as gdyn

## Student generation

In [2]:
# we define the KCs
KC_A = ProceduralKnowledgeComponent(1, "A")
KC_B = ProceduralKnowledgeComponent(2, "B")
KC_C = ProceduralKnowledgeComponent(3, "C")

P_C_knowing_A_and_B = np.array([[1., 0.],
                                [.85, .15],
                                [.9, .1],
                                [.4, .6]])


# we define the link between the two that gonna lead to the link model
link_a_to_b = LinkFromParents(KC_B, [KC_A], [.4, .6])
link_b_to_a = LinkFromChildren(KC_A, [KC_B], [.1, .9])
link_model = LinkModel([LinkFromParents(KC_C, [KC_A, KC_B], P_C_knowing_A_and_B.flatten().tolist())])
# we create the associated domain graph
domain_graph = DomainGraph([KC_A, KC_B, KC_C], link_model)

In [3]:


# default params
params = {"slip": .1, "guess":.25}

# we define the exercises that corresponds to KC_A
ex_A_1 = Exercise(1, "qcm", ex_content="", params=params)
ex_A_2 = Exercise(2, "qcm", ex_content="", params=params)
ex_A_3 = Exercise(3, "qcm", ex_content="", params=params)
ex_A_4 = Exercise(4, "qcm", ex_content="", params=params)
ex_A_5 = Exercise(5, "qcm", ex_content="", params=params)

ex_fam_A = ExerciseFamily(1, "ex_fam_A", KC_A, [ex_A_1, ex_A_2, ex_A_3, ex_A_4, ex_A_5])

# we define the exercises that corresponds to KC_A
ex_B_1 = Exercise(6, "qcm", ex_content="", params=params)
ex_B_2 = Exercise(7, "qcm", ex_content="", params=params)
ex_B_3 = Exercise(8, "qcm", ex_content="", params=params)
ex_B_4 = Exercise(9, "qcm", ex_content="", params=params)
ex_B_5 = Exercise(10, "qcm", ex_content="", params=params)

ex_fam_B = ExerciseFamily(1, "ex_fam_B", KC_B, [ex_B_1, ex_B_2, ex_B_3, ex_B_4, ex_B_5])


# we define the exercises that corresponds to KC_C
ex_C_1 = Exercise(11, "qcm", ex_content="", params=params)
ex_C_2 = Exercise(12, "qcm", ex_content="", params=params)
ex_C_3 = Exercise(13, "qcm", ex_content="", params=params)
ex_C_4 = Exercise(14, "qcm", ex_content="", params=params)
ex_C_5 = Exercise(15, "qcm", ex_content="", params=params)

ex_fam_B = ExerciseFamily(1, "ex_fam_B", KC_C, [ex_C_1, ex_C_2, ex_C_3, ex_C_4, ex_C_5])


In [4]:
learner_pool = LearnerPool(domain_graph)
learner_pool.setup_random_learners(1000)

In [5]:
learner_pool.print_default_learner_graph()
for learner in learner_pool.learners:
    print(learner.learner_graph)

LearnerGraph of Learner 0
P(1) = 0.2 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.2 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.2 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})

LearnerGraph of Learner 1
P(1) = 0.7798151020558914 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.02981724105448591 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.09059227357033721 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})

LearnerGraph of Learner 2
P(1) = 0.14009548879662637 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.8678559173024175 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.1867418811038596 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})

LearnerGraph of Learner 3
P(1) = 0.7198100021462921 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(2) = 0.4144212837932243 (params: {'learn': 0.1, 'delta': -0.9, 'gamma': 2.2})
P(3) = 0.23855079759544734 (params: {'learn': 0.1, 'del

In [6]:
print([learner.id for learner in learner_pool.learners])
n_evals = 3000
sim_evaluations = learner_pool.simulate_evaluations_from_learners(n_evals)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 22

In [7]:
knowledge_components = domain_graph.get_knowledge_components()
import pandas as pd

col_names = [[f"{kc.name}_t-1", f"{kc.name}_t", f"eval{kc.name}_t"] for kc in knowledge_components]
col_names = [element for li in col_names for element in li]


temp_eval = []
for i in range(len(sim_evaluations)):
    temp = []
    for evaluation in sim_evaluations[i]:
        temp += [(evaluation.get_kc(), int(evaluation.answers[key]["success"])) for key in list(evaluation.answers.keys())]
    temp_eval.append(temp)

n_df_rows = sum([
    sum([len(list(evaluation.answers.keys())) for evaluation in sim_evaluations[i]]) if sim_evaluations[i] else 0
        for i in range(len(sim_evaluations))])

data = pd.DataFrame([["?" for _ in range(len(col_names))] for _ in range(n_df_rows)], columns=col_names)

df_idx = 0
for learner_idx in range(len(temp_eval)):
    if sim_evaluations[learner_idx]:
        for i in range(len(temp_eval[learner_idx])):
            prev_kc, prev_succ = temp_eval[learner_idx][i]
            data.iloc[df_idx][f"eval{prev_kc.name}_t"] = int(prev_succ)
            df_idx += 1
            
print(data)

      A_t-1 A_t evalA_t B_t-1 B_t evalB_t C_t-1 C_t evalC_t
0         ?   ?       ?     ?   ?       ?     ?   ?       1
1         ?   ?       ?     ?   ?       ?     ?   ?       0
2         ?   ?       ?     ?   ?       ?     ?   ?       0
3         ?   ?       ?     ?   ?       ?     ?   ?       0
4         ?   ?       ?     ?   ?       ?     ?   ?       0
...     ...  ..     ...   ...  ..     ...   ...  ..     ...
14995     ?   ?       ?     ?   ?       ?     ?   ?       1
14996     ?   ?       ?     ?   ?       ?     ?   ?       0
14997     ?   ?       ?     ?   ?       ?     ?   ?       1
14998     ?   ?       ?     ?   ?       ?     ?   ?       0
14999     ?   ?       ?     ?   ?       ?     ?   ?       0

[15000 rows x 9 columns]


In [8]:
print(data)
data.to_csv("missing_values_data.csv")

      A_t-1 A_t evalA_t B_t-1 B_t evalB_t C_t-1 C_t evalC_t
0         ?   ?       ?     ?   ?       ?     ?   ?       1
1         ?   ?       ?     ?   ?       ?     ?   ?       0
2         ?   ?       ?     ?   ?       ?     ?   ?       0
3         ?   ?       ?     ?   ?       ?     ?   ?       0
4         ?   ?       ?     ?   ?       ?     ?   ?       0
...     ...  ..     ...   ...  ..     ...   ...  ..     ...
14995     ?   ?       ?     ?   ?       ?     ?   ?       1
14996     ?   ?       ?     ?   ?       ?     ?   ?       0
14997     ?   ?       ?     ?   ?       ?     ?   ?       1
14998     ?   ?       ?     ?   ?       ?     ?   ?       0
14999     ?   ?       ?     ?   ?       ?     ?   ?       0

[15000 rows x 9 columns]


## 1) Réalisation de l'exercice associé à KC A

### Classic BKT
**Nodes** :
- for every KC for every step of the evaluation on an exercise family
- for every exercise result in the exercise family associated to the evaluation

**Links**:
- between evaluated KC nodes between successive temporalities 
- between evaluated KC and its parents/children for a same temporality
- between evaluated KC and exercise result node

In [9]:
# we initialize the network
fast_bkt_net_str = ""
# we setup every node

for kc in domain_graph.get_knowledge_components():
    fast_bkt_net_str += f"{kc.name}_t" + "->" + f"eval{kc.name}_t" + ";"
    fast_bkt_net_str += f"{kc.name}_t-1" + "->" + f"{kc.name}_t" + ";"
for kc in domain_graph.get_knowledge_components():
    kc_parents = domain_graph.link_model.get_parents(kc)
    for parent in kc_parents:
        fast_bkt_net_str += f"{parent.name}_t" + "->" + f"{kc.name}_t" + ";"

print(fast_bkt_net_str)
fast_bkt_net = gum.fastBN(fast_bkt_net_str[:-1])

A_t->evalA_t;A_t-1->A_t;B_t->evalB_t;B_t-1->B_t;C_t->evalC_t;C_t-1->C_t;A_t->C_t;B_t->C_t;


In [10]:
learner = gum.BNLearner("missing_values_data.csv", fast_bkt_net)
print(f"Missing values in missing_values_data.csv : {learner.hasMissingValues()}")



Missing values in missing_values_data.csv : True


In [11]:
fast_bkt_net

In [12]:
learner.setVerbosity(True)
learner.useEM(1e-8)
bn=learner.learnParameters(fast_bkt_net.dag())
print(f"# iterations : {learner.nbrIterations()}")

# iterations : 41


In [13]:
gnb.sideBySide(gnb.getInference(fast_bkt_net),gnb.getInference(bn))

0,1
"structs Inference in 0.59ms A_t  2021-05-19T17:35:48.081617  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  evalA_t  2021-05-19T17:35:48.107733  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_t->evalA_t C_t  2021-05-19T17:35:48.240709  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_t->C_t A_t-1  2021-05-19T17:35:48.135326  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_t-1->A_t B_t  2021-05-19T17:35:48.162456  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  evalB_t  2021-05-19T17:35:48.188687  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_t->evalB_t B_t->C_t B_t-1  2021-05-19T17:35:48.214771  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_t-1->B_t evalC_t  2021-05-19T17:35:48.303310  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_t->evalC_t C_t-1  2021-05-19T17:35:48.329347  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_t-1->C_t","structs Inference in 0.67ms A_t  2021-05-19T17:35:48.749529  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  evalA_t  2021-05-19T17:35:48.775494  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_t->evalA_t C_t  2021-05-19T17:35:48.906055  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_t->C_t A_t-1  2021-05-19T17:35:48.801405  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  A_t-1->A_t B_t  2021-05-19T17:35:48.827191  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  evalB_t  2021-05-19T17:35:48.854094  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_t->evalB_t B_t->C_t B_t-1  2021-05-19T17:35:48.880180  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  B_t-1->B_t evalC_t  2021-05-19T17:35:48.931649  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_t->evalC_t C_t-1  2021-05-19T17:35:48.957649  image/svg+xml  Matplotlib v3.4.1, https://matplotlib.org/  C_t-1->C_t"


In [14]:
print(learner.history())

(0.11012410687065081, 0.07023287659482111, 0.049969599193577796, 0.03482657697666091, 0.02390642256271261, 0.0162333818204583, 0.010940065256772104, 0.0073346255280334035, 0.004900125574166997, 0.0032659296848765048, 0.002173283965255281, 0.001444658942077702, 0.000959637365155963, 0.0006371544144853996, 0.0004229085616974099, 0.0002806455026963564, 0.00018621293090074966, 0.0001235440574643702, 8.196105328671237e-05, 5.437205159984378e-05, 3.6068852044062205e-05, 2.3926613798246812e-05, 1.587175912323159e-05, 1.0528476476577977e-05, 6.98399247960937e-06, 4.632767723809208e-06, 3.0730974301212604e-06, 2.0385036564799943e-06, 1.3522164718885548e-06, 8.969759164579227e-07, 5.949973478812045e-07, 3.946837658868391e-07, 2.6180832621710143e-07, 1.736671162239025e-07, 1.15199838896076e-07, 7.641629323312597e-08, 5.068966708013156e-08, 3.362441058875717e-08, 2.230419667663052e-08, 1.4795322118065204e-08, 9.814277580515207e-09)


In [15]:
print(bn.cpt("A_i"))

NotFound: [pyAgrum] Object not found: No element with the key <A_i>