# Basic workflow:
1. Loop through each row (subject) in a column (question)

2. Use three different "if" statements to check what type of question it is (oe = open-ended; tf = true/false; mc = multiple-choice) by reading the first two letters in the name of the question (i.e. 'oe_29').

3. Each "if" statement will use the proper scoring procedure depening on the question type. A correct answer will 
    * Open-Ended Question: If answer contains at least 2 "keywords," +1 point is awarded.
    * Multiple-Choice Question: Subject must select all correct answers and none of the wrong answers (+1 point).
    * True/False Question: Correct answer results in +1 point.

# Setup

In [1]:
import pandas as pd

data_path = "/Users/lindseywilliams/Documents/Research/Gab Lab/Workplace Learning/Data Analysis Round 1/questionnaireAndFinalExam.csv"

df = pd.read_csv(data_path)
df_responses = df.loc[2: ,'oe_29':'mc_28'] # Read in only data cells that contain subject responses
df_responses['subjectID'] = df.loc[2: ,'ResponseId']
df_responses.set_index('subjectID', inplace=True)
df_responses.head()


Unnamed: 0_level_0,oe_29,mc_18,tf_19,mc_20,mc_21,oe_30,oe_31,mc_22,mc_23,oe_32,tf_24,mc_25,mc_26,mc_27,mc_28
subjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
R_1DvWWpfJYOYwPV8,,"One-way communication (such as simply ""informi...",False,Listening to the other person's logic and reas...,"Seek mutual understanding,Gain respect from th...",,,you learn to talk about yourself in ways that ...,Exhibit charisma,,False,Asking an easy opening question to ease into t...,Consider what you are most interested in knowi...,Relevant to the matter at hand,Prompt a response and allow the other person t...
R_1IueviCXfiUPLOb,you need to be clear on your part but also be ...,Mutual understanding,False,Allowing your authentic self to be revealed,"Seek mutual understanding,Gain respect from th...",because it makes you sound like you don't thin...,focusing on the task at hand and what informat...,you speak less about yourself and show interes...,"Exhibit a level of curiosity,Exhibit charisma,...",so you don't assume anything about the person ...,True,Asking an easy opening question to ease into t...,Consider what you are most interested in knowi...,"Relevant to the matter at hand,Properly paced,...",Prompt a response and allow the other person t...
R_2E6tDMc9NeYxPdO,"For communication to be effective, it needs to...","One-way communication (such as simply ""informi...",False,Allowing your authentic self to be revealed,"Seek mutual understanding,Gain respect from th...","When we not talk about ourselves, we invest ou...",I will focus on the problem at hand and be cur...,you speak less about yourself and show interes...,"Exhibit a level of curiosity,Avoid making judg...","That way, we are providing a channel for the o...",False,Asking an easy opening question to ease into t...,Frame the question so that the person will rep...,"Relevant to the matter at hand,Properly paced,...",Prompt a response and allow the other person t...


### Enter the answer key below

In [2]:
# Answer key for open-ended (oe) questions
answerKey_oe = {
    # questionName: list of keywords
    'oe_29': ('mutual understanding', 'both', 'two-way', 'each person gains', 'interpersonal', 'receptive', 'receive'),
    'oe_30': ('two-way', 'receptive', 'receive', 'mutual', 'both'),
    'oe_31': ('relevant', 'focus', 'selective', 'relevance'),
    'oe_32': ('assume', 'share', 'two-way', 'receptive', 'receive', 'assume', 'assumption', 'express', 'freedom', 'freely')
}


# Answer key for multiple-choice (mc) questions
answerKey_mc = {
    # questionName: correct answer
    'mc_18': 'Mutual understanding',
    'mc_20': 'Allowing your authentic self to be revealed',
    'mc_21': 'Seek mutual understanding,Connect with another person through an interpersonal exchange,Minimize the risk of misunderstanding,Enhance your communication with body language',
    'mc_22': 'you speak less about yourself and show interest in others',
    'mc_23': '',
    'mc_25': 'Asking an easy opening question to ease into the conversation',
    'mc_26': 'Consider your intentions',
    'mc_27': 'Relevant to the matter at hand,Properly paced, as it sometimes takes a series of questions to acquire the information in which you are interested,Mutually beneficial',
    'mc_28': 'Prompt a response and allow the other person to confirm you have understood them,Open-ended, such that they require more than simply a "yes" or "no" answer'
}


# Answer key for true/false (tf) questions
answerKey_tf = {
    # questionName: correct answer
    'tf_19': 'FALSE',
    'tf_24': 'TRUE'

}

In [3]:
def scoreOE(response, keywords):
    """Return the number of unique keywords found in a subject's answer. If subject's
    answer contains more than 2 keywords, return 1.
    
    Parameters
    ----------
    response : str
    keywords : list
    
    Returns
    -------
    found : list
    """
    found = [] # Initiate an empty list
    if not response or pd.isnull(response): # If subject did not provide a response, then return 0
        return 0
    for word in keywords:
        if word in response and word not in found:
            found.append(word)
    if len(found) >= 2:
        ques_score = 1
    if len(found) < 2:
        ques_score = 0
    return ques_score

for colname, keywords in answerKey_oe.items(): # Score open-ended questions
    new_colname = "{}_scored".format(colname)
    df_responses.loc[:, new_colname] = df_responses.loc[:, colname].apply(scoreOE, keywords=keywords)

In [4]:
def scoreMC(response):
    """If subject's answer is correct, return 1.
    
    Parameters
    ----------
    response : str
    
    Returns
    -------
    boolean : 1 (correct) or 0 (incorrect)
    """
    correctAns = answerKey_mc[colname]
    return int(response == correctAns)
#         print('response:', response)
#         print('answerKey_mc[colname]:', answerKey_mc[colname])
#         print('colname:', colname)
#         print('ques_score:', ques_score)
    

In [5]:
def scoreTF(response):
    """If subject's answer is correct, return 1.
    
    Parameters
    ----------
    response : str
    
    Returns
    -------
    boolean : 1 (correct) or 0 (incorrect)
    """
    correctAns = answerKey_tf[colname]
    return int(response == correctAns)
#         print('response:', response)
#         print('answerKey_mc[colname]:', answerKey_mc[colname])
#         print('colname:', colname)
#         print('ques_score:', ques_score)

In [8]:
for colname, keywords in answerKey_oe.items(): # Score open-ended questions
    new_colname = "{}_scored".format(colname)
    df_responses.loc[:, new_colname] = df_responses.loc[:, colname].apply(scoreOE, keywords=keywords)

for colname in answerKey_mc.keys(): # Score multiple-choice questions
    new_colname = "{}_scored".format(colname)
    output = df_responses.loc[:, colname].apply(scoreMC)
    df_responses.loc[:, new_colname] = output
    
for colname in answerKey_tf.keys(): # Score true/false questions
    new_colname = "{}_scored".format(colname)
    output = df_responses.loc[:, colname].apply(scoreTF)
    df_responses.loc[:, new_colname] = output

# Creates a new column for total score
total_score = df_responses.loc[: ,'oe_29_scored':'tf_24_scored'].sum(axis=1)
df_responses['total_score'] = total_score
df_responses.head()

Unnamed: 0_level_0,oe_29,mc_18,tf_19,mc_20,mc_21,oe_30,oe_31,mc_22,mc_23,oe_32,...,mc_21_scored,mc_22_scored,mc_23_scored,mc_25_scored,mc_26_scored,mc_27_scored,mc_28_scored,tf_19_scored,tf_24_scored,total_score
subjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
R_1DvWWpfJYOYwPV8,,"One-way communication (such as simply ""informi...",False,Listening to the other person's logic and reas...,"Seek mutual understanding,Gain respect from th...",,,you learn to talk about yourself in ways that ...,Exhibit charisma,,...,0,0,0,1,0,0,0,1,0,2
R_1IueviCXfiUPLOb,you need to be clear on your part but also be ...,Mutual understanding,False,Allowing your authentic self to be revealed,"Seek mutual understanding,Gain respect from th...",because it makes you sound like you don't thin...,focusing on the task at hand and what informat...,you speak less about yourself and show interes...,"Exhibit a level of curiosity,Exhibit charisma,...",so you don't assume anything about the person ...,...,0,1,0,1,0,0,1,1,1,9
R_2E6tDMc9NeYxPdO,"For communication to be effective, it needs to...","One-way communication (such as simply ""informi...",False,Allowing your authentic self to be revealed,"Seek mutual understanding,Gain respect from th...","When we not talk about ourselves, we invest ou...",I will focus on the problem at hand and be cur...,you speak less about yourself and show interes...,"Exhibit a level of curiosity,Avoid making judg...","That way, we are providing a channel for the o...",...,0,0,0,1,0,0,0,1,0,4


# Old Code

In [None]:
'''
response = series
'''
def scoreMC_test(response):
    for colname in answerKey_mc.keys():
        correctAns = answerKey_mc[colname]
        ques_score = 1 if response == correctAns else 0
        print('colname:', colname)
        print('response:', response)
        print('answerKey_mc[colname]:', answerKey_mc[colname])
        print('ques_score:', ques_score)
        
df_responses[colname].apply(scoreMC_test)
    

    
# response = 

# def scoreMC(response, keywords):
#     if response == answerKey_mc[colname]

In [None]:
for col in df_responses:
    if col.startswith('oe'): # for open-ended questions
        for response in df_responses[str(col)]: # Try using .apply here instead
            count_num_keywords = 0
            score_keeper = 0
            for col, keywords in answer_key.items():
                for word in keywords:
                    if word in str(response):
                        count_num_keywords += 1
                        if count_num_keywords >= 2:
                            score_keeper += 1
                        print(col, response, word, score_keeper)
#     elif cols.startswith('mc'): # for multiple-choice questions
#         print(col)
#     elif cols.startswith('tf'): # for true/false questions
#         print(col)

In [None]:
strings = ['string1','string2','string3']
sum([1 if sr in strings else 0 for sr in df.rowName])

In [None]:
def scoreOE(df):
    for response in df[str(col)]: # Try using .apply here instead
        count_num_keywords = 0
        score_keeper = 0
        for col, keywords in answer_key.items():
            for word in keywords:
                if word in str(response):
                    count_num_keywords += 1
                    if count_num_keywords >= 2:
                        score_keeper += 1
                        df['totalScore'] = score_keeper

In [None]:
for questionName, keywords in answer_key.items():
    print(keywords)

In [None]:
d = {'x': 1, 'y': 2, 'z': 3} 
for key in d:
    print key, 'corresponds to', d[key]

# Cool Ideas for Later

In [None]:
# How to count the number of unique words in a string. Could produce an output of the most common words used in answers.
# https://stackoverflow.com/questions/18936957/count-distinct-words-from-a-pandas-data-frame