# Modal Decomposition Model

This model is an adaptation of the math behind the economic complexity index, proposed by Hidalgo and Hausmann in this paper (https://www.pnas.org/content/106/26/10570.short ), explained in detail in section in this supplementary material (https://www.pnas.org/content/suppl/2009/06/22/0900943106.DCSupplemental/Appendix_PDF.pdf ).

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import utils
import models
from permutation_metrics import rank_similarities, rank_similarities_real

%load_ext autoreload
%autoreload 2

np.random.seed(42)

A_true = np.load('../datasets/students_uncorrel.npy')
D_true = np.load('../datasets/questions_uncorrel.npy')

num_students = A_true.shape[0]
num_questions = D_true.shape[0]
guess_prob = 1/5

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
def sigmoid(x):
    """
    Sigmoid function.

    :param x: Argument to the function
    :return: The sigmoid of x
    """
    return 1 / (1 + np.exp(-x))


def sigmoid_irf(a, c, d):
    """

    :param a: ability
    :param c: guess probability
    :param d: difficulty
    
    :return: the probability that the student will get the question right
    """
    return c + (1 - c) * sigmoid(a - d)


def floored_exp_irf(a, d, l, c):
    """
    Floored exponential function with parameters lambda, a and d.

    :param l: lambda, slope for exponential curve
    :param a: ability: [0, 1]
    :param d: difficulty: [0, 1]
    :param c: guess probability

    :return: the probability that the student will get the question right
    """
    return np.maximum(c, 1 - np.exp(-l*(a-d)))

# FE Simulated Data (Uncorrelated)

## Inference

In [5]:
R = np.load('../datasets/floored_exp_uncorrel.npy')

s, q = models.modal_decomp(R)
print('RMSE(A, A_true) = ', utils.rmse(s, A_true))
print('RMSE(D, D_true) = ', utils.rmse(q, D_true))

print(rank_similarities(A_true, R, s)['summary'])

RMSE(A, A_true) =  0.2958020117826889
RMSE(D, D_true) =  0.3690547366615422

    Summary of Ranking Evaluation:
        
    Baseline:
        Kendall Rank correlation = 0.85, 	 p-value = 0.0
        Spearman correlation = 0.966, 	 p-value = 0.0.
         
    Inferred:
        Kendall Rank correlation = 0.494, 	 p-value = 0.0
        Spearman correlation = 0.693, 	 p-value = 0.0.
    
    This gives us an average difference of -0.314 versus the baseline. 
    


## Prediction

In [9]:
train = R[:int(num_students*0.8)]
test = R[int(num_students*0.8):]

_, q = models.modal_decomp(train)
s, _ = models.modal_decomp(test[:, :-1])

probs_sigmoid = sigmoid_irf(s, q[-1], guess_prob)
probs_fe = floored_exp_irf(s, q[-1], 10, guess_prob)

preds_sigmoid = (probs_sigmoid >= 0.5).astype(int)
preds_fe = (probs_fe >= 0.5).astype(int)

preds_oracle = (floored_exp_irf(A_true, D_true[-1], 10, guess_prob) >= 0.5).astype(int)

print('Oracle acc = {}'.format(np.mean((preds_oracle == test[:, -1]).astype(int))))
print('Sigmoid test acc = {}'.format(np.mean((preds_sigmoid == test[:, -1]).astype(int))))
print('FE test acc = {}'.format(np.mean((preds_fe == test[:, -1]).astype(int))))

Oracle acc = 0.49636
Sigmoid test acc = 0.57
FE test acc = 0.6


# Sigmoid Simulated Data (Uncorrelated)

## Inference

In [26]:
R = np.load('../datasets/sigmoid_irf_uncorrel.npy')

s, q = models.modal_decomp(R)
print('RMSE(A, A_true) = ', utils.rmse(s, A_true))
print('RMSE(D, D_true) = ', utils.rmse(q, D_true))

print(rank_similarities(A_true, R, s)['summary'])

RMSE(A, A_true) =  0.2878212241644609
RMSE(D, D_true) =  0.30075461144800103

    Summary of Ranking Evaluation:
        
    Baseline:
        Kendall Rank correlation = 0.439, 	 p-value = 0.0
        Spearman correlation = 0.608, 	 p-value = 0.0.
         
    Inferred:
        Kendall Rank correlation = -0.016, 	 p-value = 0.439
        Spearman correlation = -0.026, 	 p-value = 0.416.
    
    This gives us an average difference of -0.544 versus the baseline. 
    


## Prediction

In [111]:
train = R[:int(num_students*0.8)]
test = R[int(num_students*0.8):]

_, q = models.modal_decomp(train)
s, _ = models.modal_decomp(test[:, :-1])

probs_sigmoid = sigmoid_irf(s, q[-1], guess_prob)
probs_fe = floored_exp_irf(s, q[-1], 10, guess_prob)

preds_sigmoid = (probs_sigmoid >= 0.5).astype(int)
preds_fe = (probs_fe >= 0.5).astype(int)

preds_oracle = (sigmoid_irf(A_true, D_true[-1], guess_prob) >= 0.5).astype(int)

print('Sigmoid test acc = {}'.format(np.mean((preds_sigmoid == test[:, -1]).astype(int))))
print('FE test acc = {}'.format(np.mean((preds_fe == test[:, -1]).astype(int))))

print('Oracle acc = {}'.format(np.mean((preds_oracle == test[:, -1]).astype(int))))

Sigmoid test acc = 0.65
FE test acc = 0.365
Oracle acc = 0.65


## Evaluation

In [None]:
import permutation_metrics

r = permutation_metrics.rank_similarities(A_true, s)

# Real Data

## Inference

In [17]:
R = np.genfromtxt('../datasets/real_data.csv', delimiter=',')
num_students, num_questions = R.shape

In [13]:
s

array([0.4946419 , 0.51839437, 0.85041656, 0.5144752 , 0.51251577,
       0.51252949, 0.36852449, 0.02334023, 0.37233234, 0.48933985,
       0.50284305, 0.98633174, 0.85628144, 0.38053349, 0.0155376 ,
       0.5144752 , 0.02334023, 0.51839437, 0.51839437, 0.39605737,
       0.51839437, 0.51251577, 0.51251577, 0.98827745, 0.51251577,
       0.13786088, 0.99219662, 0.50284305, 0.52618327, 0.51251577,
       0.39020621, 0.51838065, 0.51651055, 0.13599078, 0.39020621,
       0.9825239 , 0.50870793, 0.52031839, 0.0412141 , 0.98633174,
       0.0136675 , 0.85392349, 0.37819722, 0.52619699, 0.38598612,
       0.39019249, 0.38639837, 0.38012124, 0.39019249, 0.49893759,
       0.50285677, 0.51839437, 0.5144752 , 0.5005205 , 0.50861032,
       0.51839437, 0.13200972, 0.51839437, 0.39605737, 0.00967273,
       0.51252949, 0.51839437, 0.50439022, 0.49892387, 0.51839437,
       0.50872165, 0.51252949, 0.50674021, 0.85980209, 0.51839437,
       0.51641294, 0.50832312, 0.52227782, 0.38599984, 0.50052

## Prediction

In [6]:
train = R[:int(num_students*0.8)]
test = R[int(num_students*0.8):]

_, q = models.modal_decomp(train)
s, _ = models.modal_decomp(test[:, :-1])

probs_sigmoid = sigmoid_irf(s, q[-1], guess_prob)
probs_fe = floored_exp_irf(s, q[-1], 10, guess_prob)

preds_sigmoid = (probs_sigmoid >= 0.5).astype(int)
preds_fe = (probs_fe >= 0.5).astype(int)

print('Sigmoid test acc = {}'.format(np.mean((preds_sigmoid == test[:, -1]).astype(int))))
print('FE test acc = {}'.format(np.mean((preds_fe == test[:, -1]).astype(int))))

Sigmoid test acc = 0.76875
FE test acc = 0.4875


0.8675014735243889