# Modal Decomposition Model

This model is an adaptation of the math behind the economic complexity index, proposed by Hidalgo and Hausmann in this paper (https://www.pnas.org/content/106/26/10570.short ), explained in detail in section in this supplementary material (https://www.pnas.org/content/suppl/2009/06/22/0900943106.DCSupplemental/Appendix_PDF.pdf ).


INSERT EXPLANATION

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import utils
import models
from permutation_metrics import rank_similarities, rank_similarities_real

%load_ext autoreload
%autoreload 2

np.random.seed(42)

A_true = np.load('../datasets/students_uncorrel.npy')
D_true = np.load('../datasets/questions_uncorrel.npy')

num_students = A_true.shape[0]
num_questions = D_true.shape[0]
guess_prob = 1/5

In [2]:
def sigmoid(x):
    """
    Sigmoid function.

    :param x: Argument to the function
    :return: The sigmoid of x
    """
    return 1 / (1 + np.exp(-x))


def sigmoid_irf(a, c, d):
    """

    :param a: ability
    :param c: guess probability
    :param d: difficulty
    
    :return: the probability that the student will get the question right
    """
    return c + (1 - c) * sigmoid(a - d)


def floored_exp_irf(a, d, l, c):
    """
    Floored exponential function with parameters lambda, a and d.

    :param l: lambda, slope for exponential curve
    :param a: ability: [0, 1]
    :param d: difficulty: [0, 1]
    :param c: guess probability

    :return: the probability that the student will get the question right
    """
    return np.maximum(c, 1 - np.exp(-l*(a-d)))

# FE Simulated Data (Uncorrelated)

## Inference

In [63]:
R = np.load('../datasets/floored_exp_uncorrel.npy')

s, q = models.modal_decomp(R)
print('RMSE(A, A_true) = ', utils.rmse(s, A_true))
print('RMSE(D, D_true) = ', utils.rmse(q, D_true))

print(rank_similarities(A_true, R, np.expand_dims(s, axis=1))['summary'])

RMSE(A, A_true) =  0.2958020117826889
RMSE(D, D_true) =  0.3690547366615422

    Summary of Ranking Evaluation: 
    Correlations with true rankings derived from A_true.
        Baseline: 
            Kendall:0.847 (p-value 0.0)
            Spearman: 0.966 (p-value 0.0) 
            
        Prediction:
            Kendall: 0.494 (p-value 0.0)
            Spearman: 0.693 (p-value 0.0)  
            
    Average difference: -0.313 (absolute diff., vs. the baseline) 
    


## Prediction

In [32]:
train = R[:int(num_students*0.8)]
test = R[int(num_students*0.8):]

_, q = models.modal_decomp(train)
s, _ = models.modal_decomp(test[:, :-1])

probs_sigmoid = sigmoid_irf(s, q[-1], guess_prob)
probs_fe = floored_exp_irf(s, q[-1], 10, guess_prob)

preds_sigmoid = (probs_sigmoid >= 0.5).astype(int)
preds_fe = (probs_fe >= 0.5).astype(int)

preds_oracle = (floored_exp_irf(A_true[int(num_students*0.8):], D_true[-1], 10, guess_prob) >= 0.5).astype(int)

print('Oracle acc = {}'.format(np.mean((preds_oracle == test[:, -1]).astype(int))))
print('Sigmoid test acc = {}'.format(np.mean((preds_sigmoid == test[:, -1]).astype(int))))
print('FE test acc = {}'.format(np.mean((preds_fe == test[:, -1]).astype(int))))

Oracle acc = 0.4884567901234568
Sigmoid test acc = 0.6222222222222222
FE test acc = 0.45555555555555555


# Sigmoid Simulated Data (Uncorrelated)

## Inference

In [23]:
R = np.load('../datasets/sigmoid_irf_uncorrel.npy')

s, q = models.modal_decomp(R)
print('RMSE(A, A_true) = ', utils.rmse(s, A_true))
print('RMSE(D, D_true) = ', utils.rmse(q, D_true))

print(rank_similarities(A_true, R, np.expand_dims(s, axis=1))['summary'])

RMSE(A, A_true) =  0.2878212241644609
RMSE(D, D_true) =  0.30075461144800103

    Summary of Ranking Evaluation: 
    Correlations with true rankings derived from A_true.
        Baseline: 
            Kendall:0.434 (p-value 0.0)
            Spearman: 0.608 (p-value 0.0) 
            
        Prediction:
            Kendall: -0.016 (p-value 0.439)
            Spearman: -0.026 (p-value 0.416)  
            
    Average difference: -0.542 (absolute diff., vs. the baseline) 
    


## Prediction

In [31]:
train = R[:int(num_students*0.8)]
test = R[int(num_students*0.8):]

_, q = models.modal_decomp(train)
s, _ = models.modal_decomp(test[:, :-1])

probs_sigmoid = sigmoid_irf(s, q[-1], guess_prob)
probs_fe = floored_exp_irf(s, q[-1], 10, guess_prob)

preds_sigmoid = (probs_sigmoid >= 0.5).astype(int)
preds_fe = (probs_fe >= 0.5).astype(int)

preds_oracle = (sigmoid_irf(A_true[int(num_students*0.8):], D_true[-1], guess_prob) >= 0.5).astype(int)

print('Sigmoid test acc = {}'.format(np.mean((preds_sigmoid == test[:, -1]).astype(int))))
print('FE test acc = {}'.format(np.mean((preds_fe == test[:, -1]).astype(int))))

print('Oracle acc = {}'.format(np.mean((preds_oracle == test[:, -1]).astype(int))))

Sigmoid test acc = 0.6222222222222222
FE test acc = 0.45555555555555555
Oracle acc = 0.6222222222222222


# Real Data

## Inference

In [7]:
R = np.genfromtxt('../datasets/real_data.csv', delimiter=',')
num_students, num_questions = R.shape

s, q = models.modal_decomp(R)

print(rank_similarities_real(R, np.expand_dims(1-s, axis=1))['summary'])


    Summary of Ranking Evaluation:
    Correlations between R and A_pred.
    
        Baseline: 
            Kendall:0.138 (p-value 0.0)
            Spearman: 0.195 (p-value 0.0) 
            
        Average Correlation: 0.166
    


## Prediction

In [8]:
train = R[:int(num_students*0.8)]
test = R[int(num_students*0.8):]

_, q = models.modal_decomp(train)
s, _ = models.modal_decomp(test[:, :-1])

probs_sigmoid = sigmoid_irf(s, q[-1], guess_prob)
probs_fe = floored_exp_irf(s, q[-1], 10, guess_prob)

preds_sigmoid = (probs_sigmoid >= 0.5).astype(int)
preds_fe = (probs_fe >= 0.5).astype(int)

print('Sigmoid test acc = {}'.format(np.mean((preds_sigmoid == test[:, -1]).astype(int))))
print('FE test acc = {}'.format(np.mean((preds_fe == test[:, -1]).astype(int))))

Sigmoid test acc = 0.76875
FE test acc = 0.4875
