In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from irt import IRTModel
from sklearn import svm
from sklearn.linear_model import SGDRegressor, LinearRegression, BayesianRidge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from beta_irt.visualization.plots import newline
from beta_irt.visualization.plots import plot_parameters
from irt import beta_irt
from sklearn.decomposition import PCA
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
from matplotlib import gridspec
from sklearn.preprocessing import StandardScaler
import edward as ed

## Pre processing

In [11]:
# Path
path_data = './data/'
path_uci = './data/UCI - 45/'

# Name of data set
name = 'polynomial'

# Read csv
data = pd.read_csv(path_uci + name + '.csv')
data = data.dropna()

# Parameters
rd = 42
noise_std = np.linspace(0, 0.4, 20)
max_std = noise_std.max()

# Variable selection
X = data.iloc[:, 0].values.reshape(-1,1)
y = data.iloc[:, 1]

# Split data set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = rd)
indexes = list(y_train.index)

# Standard scale
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

# # Principal component analysis
# pca = PCA(n_components= 1)
# X_train = pca.fit_transform(X_train)
# X_test = pca.transform(X_test)

# Regression Models
models = [LinearRegression(), BayesianRidge(), svm.SVR(kernel= 'linear'), svm.SVR(kernel = 'rbf', gamma= 'scale', C = 5),\
     KNeighborsRegressor(), DecisionTreeRegressor(), RandomForestRegressor(),\
          AdaBoostRegressor(), MLPRegressor(max_iter=1000, solver= 'lbfgs'), MLPRegressor(hidden_layer_sizes= (50,50), solver = 'lbfgs', max_iter=500, activation='logistic')]

# Generate abilities/parameters for BIRT and other info.
Irt = IRTModel(models= models)
Irt.fit(X_train = X_train, y_train = y_train)

# Plot limits
xlim = [min(X_test) - 2*max_std, max(X_test) + 2*max_std]
ylim = [min(y_test), max(y_test)]

# Edward - set seed
ed.set_seed(rd)

# Folders
path = './beta_irt/results/'
folder = name + '/'

In [6]:
noises = np.zeros((len(X_test), len(noise_std)))
responses = np.zeros((len(noise_std), len(X_test), len(models) + 3))
abilities = np.zeros((len(noise_std), len(models) + 3))
parameters = np.zeros((len(noise_std), len(X_test), 2))

In [None]:
for i, noise in enumerate(noise_std):
    # Generate noise to feature in test set
    noise_test = np.random.normal(loc=0.0, scale= noise, size= len(X_test))
    noises[:, i] = noise_test
    X_test_ = X_test + noise_test.reshape(-1,1)
    
    # Generate IRT matrix
    Irt.irtMatrix(X_test= X_test_, y_test= y_test, noise_std = i, normalize= True, base_models= True, name= name, rd= rd)
    responses[i] = Irt.irt_matrix
#     print('Noise ' + str(i))
#     print(responses[i].reshape(1,-1).mean())
#     print(name_)
#     print('----------------------------------------------------------------__')
    name_ = name + '_s' + str(len(y_test)) + '_f' + str(i) + '_sd' + str(rd)
    
    # Generate Items' parameters and Respondents' abilities
    os.chdir('./beta_irt/')
    %run -i betairt_test.py {'irt_data_'+ name_ +'.csv'}
    os.chdir('..')
    
    error = pd.read_csv('./beta_irt/errors_' + name_ + '.csv')
    abilities = pd.read_csv(path + folder + 'irt_ability_vi_'+ name_ +'_am1@0_as1@0.csv')
    ind = list(y_test.index)
    parameters = pd.read_csv(path + folder + 'irt_parameters_vi_'+ name_ +'_am1@0_as1@0.csv').iloc[:,:].values
    
    # Move files to folder    
    if !os.path.isdir('./Results_IRT/'+ folder):
        !mkdir {'./Results_IRT/'+ folder}
    if !os.path.isdir('./Results_IRT/'+ folder + 'noise_' + str(i)):
        !mkdir 
    output = './Results_IRT/'+ folder +

In [None]:
plt.figure(figsize=(9, 6))
plt.scatter(X_train, y_train, label='Train set', s=24, edgecolor='k')
plt.scatter(X_test_, y_test, label='Test set', c='red', s=24, edgecolor='k')
plt.xlabel('X')
plt.ylabel('y')
plt.xlim(xlim)
plt.title('Train/Test Split')
plt.legend()
# plt.savefig('./Results_IRT/' + name + '/dataset.png')

# ICC

It is required to run 'betairt_test.py' with the same data generated above.

Item-Response Matrix:

Parameters:

In [None]:
difficulty = parameters.iloc[:,0].values
discrimination = parameters.iloc[:,1].values

In [None]:
ab = np.linspace(0.0001, 0.9999, 200)

## Plot parameters

In [None]:
concat = np.concatenate((X_test, y_test.values.reshape(-1,1)), axis = 1)

In [None]:
plt.scatter(concat[:,0],concat[:,1])
plt.xlabel('x')
plt.ylabel('y')
plt.ylim(ylim)
ind = [-4, -3, -2, -1, 22, 35]
for i, txt in enumerate(ind):
    plt.text(concat[txt][0],concat[txt][1], ' Outlier ' + str(i+1), fontsize=8)
if noise_std == 0:
    plt.savefig('./Results_IRT/' + name.split('_')[0] + '/original.png')

In [None]:
print(parameters.sort_values(by= 'difficulty', ascending= False).head())
print(parameters.sort_values(by= 'discrimination', ascending= True).head())

In [None]:
for i, l in enumerate([slice(0, 5, 1), slice(5, 10, 1)]):
    if noise_std > 0:
        f = plot_parameters(concat, delta = difficulty, a = discrimination, noise = noise, models= Irt.models[l], ylim= ylim)
    else:
        f = plot_parameters(concat, delta = difficulty, a = discrimination, models= Irt.models[l], ylim = ylim)
    f.savefig('./Results_IRT/' + name.split('_')[0] + '/params_'+str(i+1)+'.png')

In [None]:
chosen_i = [-4, -3, -2, -1]

In [None]:
plt.figure(figsize=(12, 12))
for sub, i in enumerate(chosen_i):
    plt.subplot(2, 2, sub+1)
    par = parameters.iloc[i,:].values
    diff = par[0]
    disc = par[1]
    E = [beta_irt(x, diff, disc) for x in ab]
    middle = np.where(np.array(E)>0.499)[0][:2]
    p1 = [ab[middle[0]], E[middle[0]]]
    p2 = [ab[middle[1]], E[middle[1]]]
    newline(p1,p2)
    slope = (E[middle[1]] - E[middle[0]])/(ab[middle[1]] - ab[middle[0]])
    plt.text(p2[0], p2[1], 'slope = '+str(round(slope, 3)),fontsize=8)
    plt.plot(ab, E,)
    plt.plot([ab[middle[0]], ab[middle[0]]],[0, E[middle[0]]], '--r')
    plt.plot([0, ab[middle[0]]],[E[middle[0]], E[middle[0]]], '--r')
    plt.scatter(abilities['ability'].values[:-1], irt.iloc[i].values, marker= 'x', c = 'red')
    plt.ylabel('Response')
    plt.xlabel('Ability')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.title('Outlier ' + str(sub+1))
plt.savefig('./Results_IRT/' + name.split('_')[0] + '/instances.png')

In [None]:
plt.figure(figsize=(8, 6))
for sub, i in enumerate(chosen_i):
#     plt.subplot(2, 2, sub+1)
    par = parameters.iloc[i,:].values
    diff = par[0]
    disc = par[1]
    E = np.array([beta_irt(x, diff, disc) for x in ab])
    Error = (1 - E)/E
    plt.plot(ab, Error, label = 'Outlier ' + str(sub + 1))
#     plt.plot([ab[middle[0]], ab[middle[0]]],[0, E[middle[0]]], '--r')
#     plt.plot([0, ab[middle[0]]],[E[middle[0]], E[middle[0]]], '--r')
    err = irt.iloc[i].values
#     plt.scatter(abilities['ability'].values[:-1], (1-err)/err, marker= 'x', c = 'red')
    plt.ylabel('Exp. Error')
    plt.xlabel('Ability')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 10.01])
#     plt.title('')
plt.legend()
plt.savefig('./Results_IRT/' + name.split('_')[0] + '/error_ability.png')

Moving IRT files to 'Results_IRT' folder:

In [None]:
!mv {'./beta_irt/irt_data_' + name + '.csv'} {'./Results_IRT/' + name.split('_')[0] + '/'}
!mv {'./beta_irt/xtest_' + name + '.csv'} {'./Results_IRT/' + name.split('_')[0] + '/'}
!mv {'./beta_irt/errors_' + name + '.txt'} {'./Results_IRT/' + name.split('_')[0] + '/'}
!mv {path + folder + 'irt_ability_vi_'+ name +'_am1@0_as1@0.csv'} {'./Results_IRT/' + name.split('_')[0] + '/'}
!mv {path + folder + 'irt_parameters_vi_'+ name +'_am1@0_as1@0.csv'} {'./Results_IRT/' + name.split('_')[0] + '/'}