In [1]:
import numpy as np

In [2]:
# DEFINE CONSTANTS
  # DO NOT REMOVE ANY VARIABLES FROM HERE EVEN IF NOT USED. IT CAN LEAD TO ERRORS BECAUSE OF DEPENDENCIES.

NUM_QS = 100
NUM_CONCEPTS = 25
NUM_LOS = 5

SCORE_DISTRIBUTION = [0,1]         # DISCRETE SET OF POSSIBLE STUDENT'S SCORE FROM A QUESTION
ANSWER_PROBABILITY = [0.4,0.6]     # PROBABILITY DISTRIBUTION OF STUDENT'S SCORE (IN THE SAME ORDER AS ABOVE)

NUM_CONCEPTS_PER_Q = [3,5]         # FORMAT [MIN,MAX] (BOTH INCL.)

LO_SET = [1,2,3,4,5]                        # SET OF POSSIBLE LOs
LO_DISTRIBUTION = [0.3,0.2,0.1,0.15,0.25]   # PROBABILITY DISTRIBUTION OF RESPECTIVE LOs


CONCEPT_DISTRIBUTION_KG = [1,5,14,21,25]   # MAKE IT CUMMULATIVE: GIVE THE ENDPOINTS
'''
E.G. CONCEPT_DISTRIBUTION_KG = [1,4,11,17,20] IS EQUIVALENT TO:

    LAYER 1: NODE 1
    LAYER 2: NODE 2,3,4
    LAYER 3: NODE 5,6,7,8,9,10,11
    LAYER 4: NODE 12,13,14,15,16,17
    LAYER 5: NODE 18,19,20

  * CONVERTED TO DICTIONARY WITH EACH CONCEPT AS KEY AND LAYER NUM AS VALUE
'''

# THIS CONCEPT-LAYER RELATION CAN BE READ FROM CSV FILE
# FILL CONCEPT_DISTRIBUTION_KG_PATH WITH PATH OF CSV FILE
#   E.G. CONCEPT_DISTRIBUTION_KG_PATH = 'data/concepts_layers.csv'
#   FORMAT: THE CONCEPTS AND LAYER NUM SHOULD BE IN COLUMN-NAMED concept' AND 'layer' RESPECTIVELY; OTHERWISE CHANGE ACCORDINGLY BELOW
CONCEPT_DISTRIBUTION_KG_PATH = ''

# LEAVE CONCEPT_DISTRIBUTION_KG_PATH STRING EMPTY ('') IF YOU WANT TO GENERATE THEM USING CONCEPT_DISTRIBUTION_KG


# WEIGHT DECAY EXPONENTIALLY WITH DEPTH IN KG
ALPHA = 0.87       # FOR LAYER L, THE DECAY IS <ALPHA^(L-1)> WHICH IS MULTIPLIED TO WEIGHTS.  # NOTE: ALPHA <= 1

In [3]:
# GENERATE DATA

# STORING THE DATA AS DISCTIONARY (IN 'MAP')
#    KEY : Q_ID
#    VALUE : (RESPONSE,np.array(CONCEPTS),LO)


# SET SEED TO MAINTAIN THE SAME RANDOM GENERATOR
np.random.seed(0)


# INIT DICTIONARY
MAP = {}

for i in range(NUM_QS):

    # SETTING QUESTIONS' ANSWER CORRECT (= 1) OR INCORRECT (= 0)
    response = np.random.choice(SCORE_DISTRIBUTION, p=ANSWER_PROBABILITY)              # 0 or 1

    # CONSTRUCTING THE CONCEPTS RELATED TO EACH QUESTION
    concepts = np.random.choice(NUM_CONCEPTS,
                                size=np.random.randint(low=NUM_CONCEPTS_PER_Q[0],high=NUM_CONCEPTS_PER_Q[1]+1),
                                replace = False)
    '''
    concepts += 1

    # ASSIGN LO
    if i >= LO_DISTRIBUTION[lo_cur_idx]:
        lo_cur_idx +=1
        lo +=1
    
    '''
    
    lo = np.random.choice(LO_SET, p=LO_DISTRIBUTION)              # 0 or 1

    MAP.update({(i+1):(response,concepts,lo)})

In [4]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

MAP

{1: (1, array([ 2, 22, 14, 17]), 4),
 2: (0, array([23, 13, 12,  5]), 2),
 3: (0, array([11, 12,  8]), 1),
 4: (0, array([12,  6, 10]), 5),
 5: (1, array([ 8, 23,  7,  5]), 4),
 6: (1, array([5, 7, 9, 1, 6]), 5),
 7: (1, array([ 3, 12, 19]), 1),
 8: (1, array([24, 11, 17, 16]), 3),
 9: (0, array([ 4, 12, 11,  0]), 2),
 10: (0, array([ 9, 21, 18]), 5),
 11: (1, array([ 9,  0, 10]), 4),
 12: (1, array([ 2,  5, 24,  6]), 4),
 13: (0, array([17, 15,  9]), 1),
 14: (1, array([17, 22,  4]), 3),
 15: (1, array([11,  0, 22, 19]), 5),
 16: (0, array([ 7,  2, 11,  5, 18]), 4),
 17: (0, array([14, 13, 10]), 5),
 18: (1, array([24, 10,  4, 22]), 3),
 19: (0, array([14, 24,  5, 10, 22]), 5),
 20: (0, array([ 4, 17, 11, 19]), 1),
 21: (1, array([ 6, 11, 23, 13, 24]), 1),
 22: (0, array([14,  8,  7, 12, 16]), 2),
 23: (1, array([20,  4,  1,  0, 19]), 5),
 24: (0, array([15, 16, 24, 10]), 2),
 25: (0, array([ 6, 14, 17]), 1),
 26: (0, array([19,  3, 14,  7, 17]), 5),
 27: (0, array([ 1, 16,  5]), 4),


In [5]:
# ASSIGNING CONCEPTS TO LAYERS IN KG

# STORING NODE-TO-LAYER RELATION IN DICTIONARY
#   FORMAT: {NODE_NUM : LAYER_NUM}

# IF CONCEPT_DISTRIBUTION_KG_PATH IS GIVEN, 
if CONCEPT_DISTRIBUTION_KG_PATH:
    import pandas as pd

    df = pd.read_csv(CONCEPT_DISTRIBUTION_KG_PATH, header=0)
    KG = dict(zip(list(df['concept']), list(df['layer'])))

# ELSE CONSTRUCT IT 
else:
    KG = {}
    j = 0
    for i in range(NUM_CONCEPTS):
        if i+1 > CONCEPT_DISTRIBUTION_KG[j]:
            j += 1
        KG.update({i+1:j+1}) 

In [6]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

# KG

In [7]:
# WEIGHTS UPDATE

    # COLUMN-WISE STACKED VECTORS
    # STORED IN W
    # E.G. W[:,1] IS WEIGHTS FOR 2-ND CONCEPT
    # NOTE: WHILE USING W[:,2], RESHAPE TO (NUM_LOS,1) TO AVOID BROADCASTING ISSUES


# INIT WEIGHTS TO ZEROS
W = np.zeros((NUM_LOS,NUM_CONCEPTS))

# CHECK FREQUENCY OF EACH CONCEPT IN A LO
for i in range(NUM_QS):
    rel_concepts = MAP[i+1][1]
    rel_lo = MAP[i+1][2]
    
    for j in range(len(rel_concepts)):
        W[rel_lo-1,rel_concepts[j]-1] += 1


# STORE W AS F (FREQUENCY MATRIX) [TO BE USED LATER]
F = np.array(W,copy=True)

# PRIORITISE CONCEPT WEIGHTS BASED ON LAYER : WEIGHT DECAY EXPONENTIALLY WITH DEPTH IN KG
    # FOR LAYER L, THE DECAY IS <ALPHA^(L-1)> WHICH IS MULTIPLIED TO WEIGHTS
    # CHANGE 'ALPHA' AT TOP IF REQUIRED
    # NOTE: ALPHA <= 1
    

for i in range(NUM_CONCEPTS):
    W[:,i] *= (ALPHA**(KG[i+1]-1))


# NORMALISING FACTOR
W_SUM = np.sum(W,axis=1,keepdims=True)

# NORMALISING WEIGHTS
W /= W_SUM


In [8]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

# W.T

In [9]:
# GENERATE TEST QS

# DEFINE CONSTANTS
  # DO NOT REMOVE ANY VARIABLES FROM HERE EVEN IF NOT USED. IT CAN LEAD TO ERRORS BECAUSE OF DEPENDENCIES.

NUM_QS_TEST = 25

NUM_CONCEPTS_PER_Q_TEST = [3,5]         # FORMAT [MIN,MAX] (BOTH INCL.)

In [10]:
# GENERATE TEST DATA

# STORING THE DATA AS DISCTIONARY (IN 'MAP_TEST')
#    KEY : Q_ID
#    VALUE : (RESPONSE,np.array(CONCEPTS),LO)

#    INIT: RESPONSE=-1 ; LO=0

# SET SEED TO MAINTAIN THE SAME RANDOM GENERATOR
np.random.seed(0)


# INIT DICTIONARY
MAP_TEST = {}


for i in range(NUM_QS_TEST):

    # SETTING QUESTIONS' ANSWER CORRECT (= 1) OR INCORRECT (= 0)
    response = -1

    # CONSTRUCTING THE CONCEPTS RELATED TO EACH QUESTION
    concepts = np.random.choice(NUM_CONCEPTS,                       size=np.random.randint(low=NUM_CONCEPTS_PER_Q_TEST[0],high=NUM_CONCEPTS_PER_Q_TEST[1]+1),
                                replace = False)
    concepts += 1

    lo = 0

    MAP_TEST.update({(i+1):[response,concepts,lo]})

In [11]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

# MAP_TEST

In [12]:
# ASSIGN LOs

for i in range(NUM_QS_TEST):
    concepts = MAP_TEST[i+1][1]
    los = np.sum(W[:,(concepts-1)],axis=1)
    
    MAP_TEST[i+1][2] = np.argmax(los)+1

In [13]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

MAP_TEST

{1: [-1, array([ 3, 21, 15]), 4],
 2: [-1, array([14, 12, 11]), 1],
 3: [-1, array([17, 14, 23]), 5],
 4: [-1, array([ 3, 22, 20, 24]), 3],
 5: [-1, array([20, 13,  9, 22]), 3],
 6: [-1, array([ 6, 22, 17, 16, 23]), 3],
 7: [-1, array([ 7, 20,  2]), 3],
 8: [-1, array([ 4, 13, 20]), 3],
 9: [-1, array([24, 20, 10, 12, 17]), 3],
 10: [-1, array([ 5, 25, 12,  1]), 2],
 11: [-1, array([ 8, 19, 20]), 1],
 12: [-1, array([ 9,  5, 11]), 4],
 13: [-1, array([ 6,  3,  2, 17]), 4],
 14: [-1, array([21,  2, 16,  6, 22]), 3],
 15: [-1, array([13, 17, 24, 18]), 3],
 16: [-1, array([18, 23,  5,  3, 15]), 4],
 17: [-1, array([ 1, 20, 11, 15]), 1],
 18: [-1, array([13, 16, 21,  7, 24]), 3],
 19: [-1, array([ 6,  7, 10, 15]), 4],
 20: [-1, array([ 3, 17, 22, 15, 13]), 3],
 21: [-1, array([ 5,  2, 21, 14, 16]), 4],
 22: [-1, array([ 7,  1, 21]), 3],
 23: [-1, array([18, 19,  4, 25, 22]), 3],
 24: [-1, array([ 7, 12, 24, 14, 25]), 3],
 25: [-1, array([11,  9, 15,  1]), 4]}

In [14]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

# np.sum(F,axis=0,keepdims=True).astype('int',).T

In [15]:
# USING VISHAL's CODE, COMPUTE THE PROBABILITY OF A STUDENT ANSWERING A CONCEPT

# SCORE OF STUDENT
num_ct = np.sum([x[0] for x in list(MAP.values())])
score = num_ct / NUM_QS

# FIND NUMBER OF TIMES A CONCEPT IS ANSWERED BY STUDENTS
# INIT FREQ TO ZEROS
concept_freq = np.zeros((NUM_CONCEPTS,1))

# CHECK FREQUENCY OF EACH CONCEPT IN A LO
for i in range(NUM_QS):
    if MAP[i+1][0]!=0:
        c_weight = MAP[i+1][0]
        rel_concepts = MAP[i+1][1]

        for j in range(len(rel_concepts)):
            concept_freq[rel_concepts[j]-1] += (c_weight*1)



# USE THE VARIABLE 'F' TO GET TOTAL FREQUENCY OF CONCEPTS
concept_freq /= np.sum(F,axis=0,keepdims=True).astype('int',).T

# print(concept_freq)


if score<0.8:  # SOME THRESHOLD IN PAPER
    a=np.log(score/(1-score)).reshape(1,1)
    #print(a)
    #print("\n\n\n")
        
    b = np.log((1-concept_freq)/concept_freq)
    assert b.shape == (NUM_CONCEPTS, 1)
    
    #print(b)
    #print("\n\n\n")
    
    #print(a-b)
        
    prob=np.exp(a-b)/(1+np.exp(a-b))
    q=1-concept_freq
    info=concept_freq*q

In [16]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

prob

array([[0.64705882],
       [0.64705882],
       [0.64705882],
       [0.70967742],
       [0.58778626],
       [0.69417476],
       [0.23404255],
       [0.51677852],
       [0.62694301],
       [0.63583815],
       [0.70967742],
       [0.47826087],
       [0.67073171],
       [0.48734177],
       [0.46610169],
       [0.55      ],
       [0.78571429],
       [0.61971831],
       [0.52071006],
       [0.64705882],
       [0.42307692],
       [0.72588832],
       [0.61111111],
       [0.6875    ],
       [0.51162791]])

In [17]:
assert concept_freq.shape == (NUM_CONCEPTS, 1)

In [18]:
assert np.sum(F,axis=0,keepdims=True).astype('int',).T.shape == (NUM_CONCEPTS, 1)

In [19]:
# PREDICT STUDENT RESPONSE

for i in range(NUM_QS_TEST):
    rel_concepts = MAP_TEST[i+1][1]
    lo = MAP_TEST[i+1][2]
    resp = 0
    
    resp = prob[(rel_concepts-1)] * W[lo-1:lo,(rel_concepts-1)].T
    resp_norm = W[lo-1:lo,(rel_concepts-1)].T
    #print(W[lo-1:lo,(rel_concepts-1)].T.shape)
    #print(resp)
    #print(resp_norm)
    
    print(np.sum(resp)/np.sum(resp_norm))
    
    MAP_TEST[i+1][0] = np.round(np.sum(resp)/np.sum(resp_norm)).astype(int)

0.5690996621382934
0.5702741728359993
0.5929705295908728
0.6986686638220923
0.7040518979080869
0.7158640105788507
0.4415487116051403
0.6924856404799192
0.7039032912881968
0.5315885890065428
0.5534486423015175
0.6352506727311161
0.6641099652616623
0.6276198999418532
0.7400210084033613
0.5859870725268003
0.6546878590507561
0.44832302894284176
0.5059455353037874
0.7518431885786016
0.5901847593992814
0.2913456548640774
0.7177329387234197
0.42939911388894614
0.6098079778069245


In [20]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

MAP_TEST

{1: [1, array([ 3, 21, 15]), 4],
 2: [1, array([14, 12, 11]), 1],
 3: [1, array([17, 14, 23]), 5],
 4: [1, array([ 3, 22, 20, 24]), 3],
 5: [1, array([20, 13,  9, 22]), 3],
 6: [1, array([ 6, 22, 17, 16, 23]), 3],
 7: [0, array([ 7, 20,  2]), 3],
 8: [1, array([ 4, 13, 20]), 3],
 9: [1, array([24, 20, 10, 12, 17]), 3],
 10: [1, array([ 5, 25, 12,  1]), 2],
 11: [1, array([ 8, 19, 20]), 1],
 12: [1, array([ 9,  5, 11]), 4],
 13: [1, array([ 6,  3,  2, 17]), 4],
 14: [1, array([21,  2, 16,  6, 22]), 3],
 15: [1, array([13, 17, 24, 18]), 3],
 16: [1, array([18, 23,  5,  3, 15]), 4],
 17: [1, array([ 1, 20, 11, 15]), 1],
 18: [0, array([13, 16, 21,  7, 24]), 3],
 19: [1, array([ 6,  7, 10, 15]), 4],
 20: [1, array([ 3, 17, 22, 15, 13]), 3],
 21: [1, array([ 5,  2, 21, 14, 16]), 4],
 22: [0, array([ 7,  1, 21]), 3],
 23: [1, array([18, 19,  4, 25, 22]), 3],
 24: [0, array([ 7, 12, 24, 14, 25]), 3],
 25: [1, array([11,  9, 15,  1]), 4]}

In [21]:
rel_concepts = np.array([1,2,3,5])
# print(prob[rel_concepts-1])
assert prob[rel_concepts-1].shape == (4,1)

In [22]:
# (UN)COMMENT THE OUTPUT (IF REQUIRED)

# NUM CORRECT
# np.sum([x[0] for x in list(MAP.values())])