<a href="https://colab.research.google.com/github/shaon11579/VAE-2021-/blob/main/simulation_2021_Hasan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- We include a data set in the ML2Pvae package for demonstrative use.
- The data is from a simulated 30 item exam which assesses 3 latent traits. The latent abilities for 5000 students, found in the data frame theta_true, were sampled from N(0,Σ). Here, Σ specifies the correlations between the 3 abilities, and is found in the data frame correlation_matrix. 
- Discrimination and difficulty paramters were sampled uniformly from [0.25, 1.75] and [−3, 3] respectively, and entries in the Q-matrix were sampled from Bern(0.35). These values can be found in the data frames disc_true, diff_true, and q_matrix. Probabilities for each student answering each question correctly were calculated with the ML2P model [5]. These probabilities were sampled from to generate a response to each item on the assessment for each student. This is the main piece of data used for training, and is found in the data frame responses.

In [31]:

import numpy as np
import pandas as pd

###############################################################################
#Create_data: simulates data for student assessment.
#INPUTS:
    #num_students: (int) # of students taking the assessment
    #num_questions: (int) # of quesions in the assessment 
    #num_tests: (int) # of times the student has taken a test
#OUTPUTS:
    #Q_mat: the expert estimation of which skills pertain to which question
    #A: how much a skill effects a question
    #B: difficulty of each question
    #Theta: the student's hidden knowledge of a subject
    #data: the student responses for each question for each test
###############################################################################
def Create_data(num_students, num_questions, num_tests, num_skills):
    J = num_skills #number of hidden skills
    K = num_students #number of students
    I = num_questions #number of questions in the assessment
    
    #Q matrix is expert prepared matrix of whether a item i requires skill j
    Q_mat = np.random.binomial(n=1,p=.35, size = [J,I]) #bernoli0.35
    
    #Discrimination parameters: how important is skill j for item i 
    A = np.random.uniform(low=0.25, high = 1.75, size = [J,I])
    
    #Theta: hidden skills for each student
    Theta = np.random.normal(loc = 0.0, scale=1.0, size = [K,J])
    np.savetxt('Theta.csv',Theta, delimiter=',')
    
    #B: the difficulty of each question
    B= np.random.uniform(low=-3.0, high = 3.0, size = [1, I])
    
    hidden = -1 * np.dot(Theta, (Q_mat * A)) + B# Equation 1 from the paper
    
    def sigmoid(x):
        return pow((1 + np.exp(x)), -1)
    
    prob_answers = sigmoid(hidden)#the probability a question is answered correctly
    
    data_rows = [] #[student, test #, q1, q2,...,qnum_questions]
    col_names = ['student','test_num']
    for question in range(I):
        col_names.append('Q{}'.format(question+1))
    for student in range(prob_answers.shape[0]):
        for test_num in range(num_tests):
            row = [None]*(num_questions + 2)#[student, test #, q1,q2,...,qnum_questions]
            row[0] = student
            row[1] = test_num
            for question in range(prob_answers.shape[1]):
                row[question+2] = np.random.binomial(n=1,p=prob_answers[student, question], size = None)
            data_rows.append(row)    
            
    data = pd.DataFrame(data = data_rows, columns = col_names)
    
    data = data.values.astype('float32')
    #data.to_csv("/content/q/data1.csv", index=False, header=False)

    return (Q_mat, A, B, Theta, data)





In [23]:
Q_mat,A,B,Theta,data= Create_data(num_students=5000, num_questions=30, num_tests=1, num_skills=7)

  

  
  


In [None]:
Q_mat,A,B,Theta,data= Create_data(num_students=5000, num_questions=30, num_tests=1, num_skills=7)

  


In [27]:
Q_mat
np.shape(Q_mat)

(7, 30)

In [34]:

np.shape(A)

(7, 30)

In [30]:
data=pd.DataFrame(Q_mat)
data.head()
data.to_csv('/content/q/data.csv')

In [12]:
#create a csv file
data.to_csv("/content/q/data.csv", index=False, header=False)

In [7]:
import csv

In [None]:
#3 latent traits 

In [None]:
# 7 latent trait 

In [None]:
# 14 latent trait 

In [13]:


# open the file in the write mode
#f = open('/content/q')

# create the csv writer
writer = csv.writer("/content/q/data.csv")

TypeError: ignored

In [10]:
import pandas as pd
import numpy as np
#get the flipcount
flipcount = int(input("How many times should I flip a coin?\n###:"))
samples = np.random.randint(0, 2, size = flipcount)
#create a pandas dataframe
data = pd.DataFrame(["heads" if i == 1 else "Tails" for i in samples])
#create a csv file
data.to_csv("/content/q/data.csv", index=False, header=False)

How many times should I flip a coin?
###:10


NameError: ignored