In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def loadCSV(file):
    """
    Load a CSV file
    
    Parameters
    ----------
    file: string (containing .csv extension)
    
    Returns
    -------
    data: numpy array, where entries are strings
    """
    with open(file,'r') as csvfile:
        csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        data = []
        for row in csvreader:
            data.append(row)
    return np.array(data)

def cleanData(data, dataStart = 3):
    """
    Create, from the raw data imported from a csv file, a useable list of dictionaries of questions & answers
    
    Parameters
    ----------
    data:                 numpy array, where entries are strings. Rows denote responses, and header row is questions.
    dataStart (optional): integer for starting row, default = 3, assuming first row(s) are headers
    
    Returns
    -------
    dataList: list of dictionaries, where index = response, key = question, keyval = answers. Allows for multiple answers to a question.
    """
    
    IDS = data[2]
    titles = data[0]
    dataList = []
    
    for i in range(dataStart,len(data)):
        responseDict = {}
        for j in range(len(data[i])):
            responseDict.update({(IDS[j],titles[j]): data[i][j]})
        dataList.append(responseDict)
    return dataList

def findQuestions(data):
    """
    Take input data and produce a dictionary of question titles, IDs and texts
    
    Parameters
    ----------
    data: numpy array, where entries are strings
    
    Returns
    -------
    questions: dictionary, keys = questions, keyvals = question text
    """
    
    questions = {}
    for i in range(len(data[0])):
        title = data[0][i]
        text = data[1][i]
        ID = data[2][i]
        questions.update({(ID,title) : text})
    return questions

In [3]:
class SurveySet:
    """
    common class for all imported data sets, containing:
        - useable responses (ignoring incomplete, no consent, underage)
        - questions for data set (dictionary, where keys = QID, keyvals = [question name, question text])
        - adding "expert" fields
        - flagging questions that are seperated by ',', rather than '- '
    """
    
    importantFields = [['Finished','True'],['I have read and understood the above information and give consent for my work to be analyzed for research purposes.','Yes'],['What is your age?','18 years or older']]
    
    allQuestions = {}
    
    def __init__(self,file):
        self.file = file
        #loading and cleaning up initial responses from the folder
        rawData = loadCSV(file)
        responses = cleanData(rawData)
        self.questions = findQuestions(rawData)
        useableResponses = []
        fieldIDS = []
        #checking to see what are the IDS for the important fields
        for field in SurveySet.importantFields:
            for question in self.questions:
                if field[0] in self.questions[question]:
                    fieldIDS.append([question,field[1]])
        for response in responses:
            useResponse = True
            #seeing if response can be used
            for field in fieldIDS:
                if response[field[0]][0] == field[1]:
                    pass
                else:
                    useResponse = False
                    break
            #if the response can be used, add the expert field and seperate by comma questions that require it
            if useResponse:
                if 'expert' in self.file:
                    response.update({('{"ImportId":"expert"}','Expert') : 'True'})
                else:
                    response.update({('{"ImportId":"expert"}','Expert') : 'False'})
                useableResponses.append(response)
                for question in response:
                    answer = response[question]
                    if question in SurveySet.allQuestions:
                        if answer in SurveySet.allQuestions[question]:
                            pass
                        else:
                            SurveySet.allQuestions[question].append(answer)
                    else:
                        SurveySet.allQuestions.update({question : [answer]})
                        
        self.questions.update({('{"ImportId":"expert"}','Expert') : 'Expert'})
        self.responses = useableResponses