In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from irt import IRTModel
from sklearn import svm
from sklearn.linear_model import SGDRegressor, LinearRegression, BayesianRidge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from beta_irt.visualization.plots import newline
from beta_irt.visualization.plots import plot_parameters
from irt import beta_irt
from sklearn.decomposition import PCA

# Data set

In [None]:
# Path
path_data = './data/'
path_uci = './data/UCI - 45/'

# Name of data set
name = 'mpg'

# Read csv
data = pd.read_csv(path_uci + name + '.csv')
data = data.dropna()

## Pre processing

Variable selection

In [None]:
X = data.iloc[:, 1:-3]
y = data.iloc[:, 0]

Pre processing

In [None]:
# Split data set
rd = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = rd)

# Principal Component Analysis
pca = PCA(n_components= 1)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

Generate noise

In [None]:
# Noise generated
noise_train = np.random.normal(loc=0.0, scale= 8, size= len(y_train))
noise_test = np.random.normal(loc=0.0, scale= 8, size= len(y_test))

# Apply noise
y_train += noise_train
y_test += noise_test

# Plot
plt.figure(figsize=(7, 3))
plt.hist(noise_train, label='Train set')
plt.hist(noise_test, label='Test set')
plt.ylabel('Frequency')
plt.xlabel('Noise')
plt.legend()
plt.show()

This part is where data for BIRT models are generated.

In [None]:
# Regression Models
models = [LinearRegression(), BayesianRidge(), svm.SVR(kernel= 'linear'), svm.SVR(kernel = 'rbf'),\
     KNeighborsRegressor(), DecisionTreeRegressor(), RandomForestRegressor(),\
          AdaBoostRegressor(), MLPRegressor(max_iter=1000, solver= 'lbfgs'), MLPRegressor(hidden_layer_sizes=(50,), max_iter=1000, solver= 'adam')]

# Generate abilities/parameters for BIRT and other info.
Irt = IRTModel(models= models)
Irt.irtMatrix(X_train = X_train, y_train = y_train, X_test= X_test, y_test= y_test, normalize= True, base_models= True, name= name, rd= rd)

It is required to run 'betairt_test.py' with the same data generated above.

# ICC

Item-Response Matrix:

In [None]:
path = './beta_irt/results/'
folder = 'mpg/'
name = 'mpg_s79_f20_sd42'

In [None]:
irt = pd.read_csv('./beta_irt/irt_data_' + name + '.csv')
X_pc1 = pd.read_csv('./beta_irt/xtest_' + name + '.csv').iloc[:,0].values.reshape(-1,1)
abilities = pd.read_csv(path + folder + 'irt_ability_vi_'+ name +'_am1@0_as1@0.csv')
ind = list(y_test.index)
parameters = pd.read_csv(path + folder + 'irt_parameters_vi_'+ name +'_am1@0_as1@0.csv')
parameters.index = ind
irt.index = ind

In [None]:
irt.head()

Search for instance:

In [None]:
instance = 257
irt.loc[irt.index == instance]

In [None]:
difficulty = parameters.iloc[:,0].values
discrimination = parameters.iloc[:,1].values

In [None]:
ab = np.linspace(0.0001, 0.9999, 200)

## Plot parameters

In [None]:
concat = np.concatenate((X_pc1, y_test.values.reshape(-1,1)), axis = 1)

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(concat[:,0],concat[:,1])
for i, txt in enumerate(ind):
    plt.text(concat[i,0],concat[i,1], str(txt), fontsize=8)
#     plt.text(concat[i,0],concat[i,1], str(txt), fontsize=8)

In [None]:
plot_parameters(concat, delta = difficulty, a = discrimination, noise = noise_test, models= Irt.models[:5]).show()

In [None]:
chosen_i = [259, 95, 338, 243]

In [None]:
plt.figure(figsize=(12, 12))
for sub, i in enumerate(chosen_i):
    plt.subplot(2, 2, sub+1)
    par = parameters.loc[i,:].values
    diff = par[0]
    disc = par[1]
    E = [beta_irt(x, diff, disc) for x in ab]
    middle = np.where(np.array(E)>0.499)[0][:2]
    p1 = [ab[middle[0]], E[middle[0]]]
    p2 = [ab[middle[1]], E[middle[1]]]
    newline(p1,p2)
    slope = (E[middle[1]] - E[middle[0]])/(ab[middle[1]] - ab[middle[0]])
    plt.text(p2[0], p2[1], 'slope = '+str(round(slope, 3)),fontsize=8)
    plt.plot(ab, E,)
    plt.plot([ab[middle[0]], ab[middle[0]]],[0, E[middle[0]]], '--r')
    plt.plot([0, ab[middle[0]]],[E[middle[0]], E[middle[0]]], '--r')
    plt.scatter(abilities['ability'].values[:-1], irt.loc[irt.index == i].values[0], marker= 'x', c = 'red')
    plt.ylabel('Response')
    plt.xlabel('Ability')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.title('Instance ' + str(i))

In [None]:
plt.figure(figsize=(8, 6))
for sub, i in enumerate(chosen_i):
#     plt.subplot(2, 2, sub+1)
    par = parameters.loc[i,:].values
    diff = par[0]
    disc = par[1]
    E = np.array([beta_irt(x, diff, disc) for x in ab])
    Error = (1 - E)/E
    plt.plot(ab, Error, label = 'Instance ' + str(i))
#     plt.plot([ab[middle[0]], ab[middle[0]]],[0, E[middle[0]]], '--r')
#     plt.plot([0, ab[middle[0]]],[E[middle[0]], E[middle[0]]], '--r')
    err = irt.loc[irt.index == i].values[0]
#     plt.scatter(abilities['ability'].values[:-1], (1-err)/err, marker= 'x', c = 'red')
    plt.ylabel('Exp. Error')
    plt.xlabel('Ability')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 10.01])
#     plt.title('')
plt.legend()
plt.show()

## MAE x Ability

In [None]:
mae = np.zeros(len(abilities)-1)
err_rel = np.absolute(y_test - y_test.mean())
for i, mod in enumerate(abilities.iloc[:-1, 0]):
    for j, rel in enumerate(err_rel):
        if mod == 'Worst':
            continue
        else:
            y = irt.iloc[j, i]
            mae[i] = mae[i] + ((1 - y)/y)*rel
mae = mae/len(y_test)
mae = np.delete(mae, -1)

In [None]:
mdls = ['  LR', 'Bayes', 'SVR - Linear', 'SVR - Rbf', 'KNR', 'DT', 'RF', 'AdaB', 'MLP (100)', 'MLP (50-50)', 'Optimal', 'Medium']
c_n = 'C0'
c_e = 'red'
color = [c_n, c_n,c_n,c_n,c_n,c_n,c_n,c_n,c_n,c_n,c_e,c_e]

In [None]:
plt.figure(figsize=(12, 8))
plt.scatter(abilities.iloc[:-2, 1], mae, c= color)
for i, txt in enumerate(mdls):
    plt.text(abilities.iloc[i, 1], mae[i], ' '+txt, fontsize=8)
plt.ylabel('Mean Absolute Error (MAE)')
plt.xlabel('Ability')
plt.title('Ability x MAE')
plt.show()