In [1]:
import numpy as np

In [2]:
# DEFINE CONSTANTS
  # DO NOT REMOVE ANY VARIABLES FROM HERE EVEN IF NOT USED. IT CAN LEAD TO ERRORS BECAUSE OF DEPENDENCIES.

NUM_QS = 100
NUM_CONCEPTS = 20
NUM_LOS = 4

SCORE_DISTRIBUTION = [0,1]         # DISCRETE SET OF POSSIBLE STUDENT'S SCORE FROM A QUESTION
ANSWER_PROBABILITY = [0.4,0.6]     # PROBABILITY DISTRIBUTION OF STUDENT'S SCORE (IN THE SAME ORDER AS ABOVE)

NUM_CONCEPTS_PER_Q = [4,5]         # FORMAT [MIN,MAX] (BOTH INCL.)


LO_DISTRIBUTION = [20,50,76,100]   # MAKE IT CUMMULATIVE: GIVE THE ENDPOINTS
'''
E.G. LO_DISTRIBUTION = [20,50,76,100]  MEANS
    
    Q 1-20   :  LO-1
    Q 21-50  :  LO-2
    Q 51-76  :  LO-3
    Q 77-100 :  LO-4
'''

CONCEPT_DISTRIBUTION_KG = [1,4,11,17,20]   # MAKE IT CUMMULATIVE: GIVE THE ENDPOINTS
'''
E.G. CONCEPT_DISTRIBUTION_KG = [1,4,11,17,20] IS EQUIVALENT TO:

    LAYER 1: NODE 1
    LAYER 2: NODE 2,3,4
    LAYER 3: NODE 5,6,7,8,9,10,11
    LAYER 4: NODE 12,13,14,15,16,17
    LAYER 5: NODE 18,19,20

  * CONVERTED TO DICTIONARY WITH EACH CONCEPT AS KEY AND LAYER NUM AS VALUE
'''

# THIS CONCEPT-LAYER RELATION CAN BE READ FROM CSV FILE
# FILL CONCEPT_DISTRIBUTION_KG_PATH WITH PATH OF CSV FILE
#   E.G. CONCEPT_DISTRIBUTION_KG_PATH = 'data/concepts_layers.csv'
#   FORMAT: THE CONCEPTS AND LAYER NUM SHOULD BE IN COLUMN-NAMED concept' AND 'layer' RESPECTIVELY; OTHERWISE CHANGE ACCORDINGLY BELOW
CONCEPT_DISTRIBUTION_KG_PATH = 'data/concepts_layers.csv'

# LEAVE CONCEPT_DISTRIBUTION_KG_PATH STRING EMPTY ('') IF YOU WANT TO GENERATE THEM USING CONCEPT_DISTRIBUTION_KG


# WEIGHT DECAY EXPONENTIALLY WITH DEPTH IN KG
ALPHA = 0.87       # FOR LAYER L, THE DECAY IS <ALPHA^(L-1)> WHICH IS MULTIPLIED TO WEIGHTS.  # NOTE: ALPHA <= 1

In [3]:
# GENERATE DATA

# STORING THE DATA AS DISCTIONARY (IN 'MAP')
#    KEY : Q_ID
#    VALUE : (RESPONSE,np.array(CONCEPTS),LO)


# SET SEED TO MAINTAIN THE SAME RANDOM GENERATOR
np.random.seed(0)

# INIT DICTIONARY
MAP = {}

lo = 1              # INIT LO = 1; UPDATED IN LOOP
lo_cur_idx = 0      # INDEX OF LO_DISTRIBUTION

for i in range(NUM_QS):

    # SETTING QUESTIONS' ANSWER CORRECT (= 1) OR INCORRECT (= 0)
    response = np.random.choice(SCORE_DISTRIBUTION, p=ANSWER_PROBABILITY)              # 0 or 1

    # CONSTRUCTING THE CONCEPTS RELATED TO EACH QUESTION
    concepts = np.random.choice(NUM_CONCEPTS,
                                size=np.random.randint(low=NUM_CONCEPTS_PER_Q[0],high=NUM_CONCEPTS_PER_Q[1]+1),
                                replace = False)
    concepts += 1
    
    # ASSIGN LO
    if i >= LO_DISTRIBUTION[lo_cur_idx]:
        lo_cur_idx +=1
        lo +=1
    
    MAP.update({(i+1):(response,concepts,lo)})
    
        
    

In [4]:
MAP

{1: (1, array([16, 11, 15, 14,  2]), 1),
 2: (0, array([ 5, 13,  8, 18, 20]), 1),
 3: (1, array([19, 18, 15, 20, 17]), 1),
 4: (1, array([ 2, 16, 17, 14, 13]), 1),
 5: (0, array([13,  7, 11,  9, 18]), 1),
 6: (1, array([19,  6, 15,  2,  5]), 1),
 7: (1, array([20,  6, 19, 16]), 1),
 8: (0, array([ 7, 10,  2,  6]), 1),
 9: (0, array([ 3, 19,  9, 11, 20]), 1),
 10: (0, array([ 6, 15,  5,  1, 11]), 1),
 11: (1, array([15, 14, 18, 10, 13]), 1),
 12: (1, array([ 7,  1, 12, 13,  5]), 1),
 13: (0, array([10, 17, 14, 20]), 1),
 14: (1, array([ 1, 15, 13,  7,  8]), 1),
 15: (0, array([11,  4,  8, 16]), 1),
 16: (1, array([ 7,  3, 13, 12,  6]), 1),
 17: (1, array([ 2, 18, 12,  7]), 1),
 18: (1, array([ 9, 11,  5, 14, 17]), 1),
 19: (0, array([ 6,  5, 20,  3]), 1),
 20: (1, array([ 1,  3, 11, 15]), 1),
 21: (1, array([17, 13, 16, 20]), 2),
 22: (1, array([15,  6,  1, 13,  8]), 2),
 23: (0, array([16, 19, 11, 12, 14]), 2),
 24: (1, array([ 2, 20,  3, 11, 14]), 2),
 25: (1, array([ 9, 16, 20, 14]),

In [5]:
# ASSIGNING CONCEPTS TO LAYERS IN KG

# STORING NODE-TO-LAYER RELATION IN DICTIONARY
#   FORMAT: {NODE_NUM : LAYER_NUM}

# IF CONCEPT_DISTRIBUTION_KG_PATH IS GIVEN, 
if CONCEPT_DISTRIBUTION_KG_PATH:
    import pandas as pd

    df = pd.read_csv(CONCEPT_DISTRIBUTION_KG_PATH, header=0)
    KG = dict(zip(list(df['concept']), list(df['layer'])))

# ELSE CONSTRUCT IT 
else:
    KG_ = {}
    j = 0
    for i in range(NUM_CONCEPTS):
        if i+1 > CONCEPT_DISTRIBUTION_KG[j]:
            j += 1
        KG.update({i+1:j+1}) 

In [6]:
KG

{1: 1,
 2: 2,
 3: 2,
 4: 2,
 5: 3,
 6: 3,
 7: 3,
 8: 3,
 9: 3,
 10: 3,
 11: 3,
 12: 4,
 13: 4,
 14: 4,
 15: 4,
 16: 4,
 17: 4,
 18: 5,
 19: 5,
 20: 5}

In [7]:
# WEIGHTS UPDATE

    # COLUMN-WISE STACKED VECTORS
    # STORED IN W
    # E.G. W[:,1] IS WEIGHTS FOR 2-ND CONCEPT
    # NOTE: WHILE USING W[:,2], RESHAPE TO (NUM_LOS,1) TO AVOID BROADCASTING ISSUES


# INIT WEIGHTS TO ZEROS
W = np.zeros((NUM_LOS,NUM_CONCEPTS))

# CHECK FREQUENCY OF EACH CONCEPT IN A LO
for i in range(NUM_QS):
    rel_concepts = MAP[i+1][1]
    rel_lo = MAP[i+1][2]
    
    for j in range(len(rel_concepts)):
        W[rel_lo-1,rel_concepts[j]-1] += 1


# STORE W AS F (FREQUENCY MATRIX) [TO BE USED LATER]
F = np.array(W,copy=True)

# PRIORITISE CONCEPT WEIGHTS BASED ON LAYER : WEIGHT DECAY EXPONENTIALLY WITH DEPTH IN KG
    # FOR LAYER L, THE DECAY IS <ALPHA^(L-1)> WHICH IS MULTIPLIED TO WEIGHTS
    # CHANGE 'ALPHA' AT TOP IF REQUIRED
    # NOTE: ALPHA <= 1
    

for i in range(NUM_CONCEPTS):
    W[:,i] *= (ALPHA**(KG[i+1]-1))


# NORMALISING FACTOR
W_SUM = np.sum(W,axis=1,keepdims=True)

# NORMALISING WEIGHTS
W /= W_SUM

# MAKE NUMERATOR 1 FOR CONTRIBUTING CONCEPTS (DIVIDE BY FREQ F)
F[F == 0] = 1e-8      # REPLACE 0s with 1e-8 TO AVOID DIVIDE BY ZERO
W /= F

In [8]:
W

array([[0.01497384, 0.01302724, 0.01302724, 0.01302724, 0.0113337 ,
        0.0113337 , 0.0113337 , 0.0113337 , 0.0113337 , 0.0113337 ,
        0.0113337 , 0.00986032, 0.00986032, 0.00986032, 0.00986032,
        0.00986032, 0.00986032, 0.00857847, 0.00857847, 0.00857847],
       [0.01036085, 0.00901394, 0.00901394, 0.00901394, 0.00784213,
        0.00784213, 0.00784213, 0.00784213, 0.00784213, 0.00784213,
        0.00784213, 0.00682265, 0.00682265, 0.00682265, 0.00682265,
        0.00682265, 0.00682265, 0.00593571, 0.00593571, 0.00593571],
       [0.01227779, 0.01068168, 0.01068168, 0.01068168, 0.00929306,
        0.00929306, 0.00929306, 0.00929306, 0.00929306, 0.00929306,
        0.00929306, 0.00808496, 0.00808496, 0.00808496, 0.00808496,
        0.00808496, 0.00808496, 0.00703392, 0.00703392, 0.00703392],
       [0.01255208, 0.01092031, 0.01092031, 0.01092031, 0.00950067,
        0.00950067, 0.00950067, 0.00950067, 0.00950067, 0.00950067,
        0.00950067, 0.00826558, 0.00826558, 0

In [9]:
# PREDICT STUDENT'S LO

# INIT lo
lo = np.zeros((NUM_LOS,1))

# UPDATE THE lo
for i in range(NUM_QS):
    data = MAP[i+1]    # data[0] - RESPONSE; data[1] - TUPLE OF RESPONSES; data[2] - LO TO WHICH QUESTION BELONGS TO
    
    if data[0] != 0:  
        for j in range(len(data[1])):
            lo[data[2]-1] += (data[0] * W[data[2]-1,(data[1][j]-1)])

In [10]:
lo

array([[0.60969946],
       [0.56429379],
       [0.60290282],
       [0.61745876]])