In [None]:
%run "0.0 Data preparation.ipynb"

# Dataset selection

In [None]:
_dataName, _inputData, _dataNameSUSNormalized, _inputDataSUSNormalized = selectDataset("data20190703")

In [None]:
stackedBarPlotsFilenamePathStem = graphsSavePathStem + "/stacked-bar-plots"
tryCreateFolder(stackedBarPlotsFilenamePathStem)

barPlotsErrorBarsFilenamePathStem = graphsSavePathStem + "/bar-plots-with-error-bars"
tryCreateFolder(barPlotsErrorBarsFilenamePathStem)

In [None]:
def plotStackedBar(question
                   , dataName=_dataName
                   , saveFig=False
                   , filename=None
                   , fig=None
                   , ax=None
                   , title=None
                   , displayedGameNames=identityGameNames
                   , showLegend=True
                   , printDebug=True
                   , tight_layout=True
                   , constrained_layout=False
                  ):

    assert (dataName in datasets), ("Not found in datasets: '" + dataName + "'")

    data = datasets[dataName]
    
    allPossibleValues = np.unique(datasets[dataName].loc[:, shortLikertQuestions.values].values)
    minLikertValue = min(allPossibleValues)
    maxLikertValue = max(allPossibleValues)
    assert ((maxLikertValue - minLikertValue) == 4), ("Function designed for 5-step Likert scale")

    callShow = False

    if None == ax:
        callShow = True
        if None == fig:
            fig = plt.figure(constrained_layout=constrained_layout)
            fig.patch.set_facecolor('white')
        ax = plt.subplot(111)
        ax.patch.set_facecolor('white')

    #    assert (gameIndex < len(games)), ("game index must be smaller than " + str(len(games)))

    ind = np.arange(len(games))    # the x locations for the groups
    width = 0.35       # the width of the bars: can also be len(x) sequence
    colors = cm.jet(np.linspace(1, 0, 5))

    #stacked bar plots of games
    #gameStackedBarPlots[gameIndex][plottedValue] contains:
    # the bar plot for game gameIndex of Likert/refined value plottedValue
    gameStackedBarPlots = [[] for i in range(len(games))]
    #participantCounts = [0 for i in range(len(games))]
    
    commonData = datasets[dataName]
    commonData = commonData.loc[:, [question, gameQuestion]].groupby([gameQuestion, question]).size()

    if printDebug:
        print("-------------------------------------------------------------------------------------")
        print(question)
        
    for gameIndex in range(len(games)):

    #        print("gameIndex="+str(gameIndex))
        game = games[gameIndex]
        data = commonData[game]

        gameLikertCounts = [data.get(i,0) for i in range(minLikertValue, maxLikertValue+1)]
        
        if printDebug:
            print("  " + game + ": " + str(gameLikertCounts))
            print(str(data))
        #participantCounts[gameIndex] = data.sum()

        #agreement scale: 0 == 100% agree, 4 == 100% disagree
        gameStackedBarPlots[gameIndex] = [[] for i in range(5)]
        for i in range(5):
            _bottom = 0
            if i != 4:
                _bottom = sum(gameLikertCounts[i+1:])
            gameStackedBarPlots[gameIndex][i] = ax.bar(\
                                    ind[gameIndex]\
                                  , gameLikertCounts[i]\
                                  , width\
                                  , color=colors[i]\
                                  , bottom=_bottom\
                                  )

    plt.ylabel('Stacked answers')
    plt.ylim(0, getMaxAnswers(dataName))
    if not title:
        plt.title(question)
    else:
        plt.title(title)

    # margins left and right of the bars from the Y axis
    #    plt.margins(1.2)

    plt.xticks(ind, displayedGameNames[games])
    #    plt.yticks(np.arange(0, data.sum(), round(max(participantCounts)/10)))
    if showLegend:
        plt.legend(
            [gameStackedBarPlots[gameIndex][i][0] for i in range(5)]
            , likert5StepDescriptions
            , loc='center left', bbox_to_anchor=(1, 0.5)
        )

    if tight_layout:
        plt.tight_layout()

    if callShow:
        plt.show()

    if saveFig:
        shortQuestion = shortQuestions.index[shortQuestions.values==question].values[0]
        if filename==None:
            path = stackedBarPlotsFilenamePathStem + "/" + dataName
            tryCreateFolder(path)
            filename = path + "/" + shortQuestion
        fig.savefig(filename)
        
    return gameStackedBarPlots

plotStackedBar(
    indexedQuestions[7]
   , dataName=_dataName
   , saveFig=False
   , printDebug=False
);



In [None]:
#def plotStackedBar(
#    question
#                   , dataName=_dataName
#                   , saveFig=False
#                   , filename=None
#                   , fig=None
#                   , ax=None
#                   , title=None
#                   , displayedGameNames=identityGameNames
#                   , showLegend=True
#                   , printDebug=True
#                   , tight_layout=True
#                   , constrained_layout=False
#                  ):

question=indexedQuestions[7]
dataName=_dataName
saveFig=False
filename=None
fig=None
ax=None
title=None
displayedGameNames=identityGameNames
showLegend=True
printDebug=True
tight_layout=True
constrained_layout=False

assert (dataName in datasets), ("Not found in datasets: '" + dataName + "'")

data = datasets[dataName]

allPossibleValues = np.unique(datasets[dataName].loc[:, shortLikertQuestions.values].values)
minLikertValue = min(allPossibleValues)
maxLikertValue = max(allPossibleValues)
assert ((maxLikertValue - minLikertValue) == 4), ("Function designed for 5-step Likert scale")

callShow = False

if None == ax:
    callShow = True
    if None == fig:
        fig = plt.figure(constrained_layout=constrained_layout)
        fig.patch.set_facecolor('white')
    ax = plt.subplot(111)
    ax.patch.set_facecolor('white')

#    assert (gameIndex < len(games)), ("game index must be smaller than " + str(len(games)))

ind = np.arange(len(games))    # the x locations for the groups
width = 0.35       # the width of the bars: can also be len(x) sequence
colors = cm.jet(np.linspace(1, 0, 5))

#stacked bar plots of games
#gameStackedBarPlots[gameIndex][plottedValue] contains:
# the bar plot for game gameIndex of Likert/refined value plottedValue
gameStackedBarPlots = [[] for i in range(len(games))]
#participantCounts = [0 for i in range(len(games))]

commonData = datasets[dataName]
commonData = commonData.loc[:, [question, gameQuestion]].groupby([gameQuestion, question]).size()

if printDebug:
    print("-------------------------------------------------------------------------------------")
    print(question)

for gameIndex in range(len(games)):

#        print("gameIndex="+str(gameIndex))
    game = games[gameIndex]
    data = commonData[game]

    gameLikertCounts = [data.get(i,0) for i in range(minLikertValue, maxLikertValue+1)]

    if printDebug:
        print("  " + game + ": " + str(gameLikertCounts))
        print(str(data))
    #participantCounts[gameIndex] = data.sum()

    #agreement scale: 0 == 100% agree, 4 == 100% disagree
    gameStackedBarPlots[gameIndex] = [[] for i in range(5)]
    for i in range(5):
        _bottom = 0
        if i != 4:
            _bottom = sum(gameLikertCounts[i+1:])
        gameStackedBarPlots[gameIndex][i] = ax.bar(\
                                ind[gameIndex]\
                              , gameLikertCounts[i]\
                              , width\
                              , color=colors[i]\
                              , bottom=_bottom\
                              )

plt.ylabel('Stacked answers')
plt.ylim(0, getMaxAnswers(dataName))
if not title:
    plt.title(question)
else:
    plt.title(title)

# margins left and right of the bars from the Y axis
#    plt.margins(1.2)

plt.xticks(ind, displayedGameNames[games])
#    plt.yticks(np.arange(0, data.sum(), round(max(participantCounts)/10)))
if showLegend:
    plt.legend(
        [gameStackedBarPlots[gameIndex][i][0] for i in range(5)]
        , likert5StepDescriptions
        , loc='center left', bbox_to_anchor=(1, 0.5)
    )

if tight_layout:
    plt.tight_layout()

if callShow:
    plt.show()

if saveFig:
    shortQuestion = shortQuestions.index[shortQuestions.values==question].values[0]
    if filename==None:
        path = stackedBarPlotsFilenamePathStem + "/" + dataName
        tryCreateFolder(path)
        filename = path + "/" + shortQuestion
    fig.savefig(filename)

gameStackedBarPlots

#plotStackedBar(
#    indexedQuestions[7]
#   , dataName=_dataName
#   , saveFig=False
#   , printDebug=False
#);



In [None]:
minLikertValue 

In [None]:
def getMaxAnswers(dataName):
    return max(
        [len(
            
            # unique respondents
            #np.unique(
            #datasets[dataName][datasets[dataName][gameQuestion]==gameTitle][idQuestion]
            #)
            
            # unique answers
            datasets[dataName][datasets[dataName][gameQuestion]==gameTitle]
        )
            for gameTitle in games
        ]
    )

In [None]:
#for dataName in datasets.keys():
#    print("\n"+dataName+":\n\t"+str(getMaxAnswers(dataName)))

In [None]:
def getStackedBarPlotsMatrix(
        dataName=_dataName
        , saveFig=False
        , suptitle=None
        , tight_layout=True
        , constrained_layout=False
    ):
    #fig, axs = plt.subplots(3, 4, constrained_layout=True, figsize=(15,8))
    fig = plt.figure(figsize=(15,8), constrained_layout=constrained_layout)
    fig.patch.set_facecolor('white')

    graphIndex = 1

    for question in shortLikertQuestions:
        ax = fig.add_subplot(3,4,graphIndex)
        # format Qnn
        #shortQuestion = shortQuestions.index[shortQuestions.values==question].values[0]
        # format 1-word description
        shortQuestion = shortDescQuestions[question]
        gameStackedBarPlots = plotStackedBar(
            question
           # for raw data
           #, dataName=_dataName
           # for refined data
           , dataName=dataName
           , saveFig=False
           , fig=fig
           , ax=ax
           , title=shortQuestion
           , displayedGameNames=shortGameNames
    #                   , showLegend=(graphIndex==11)
           , showLegend=False
           , printDebug=False
           , tight_layout=tight_layout
          )
        graphIndex += 1
    plt.legend(
        [gameStackedBarPlots[0][i][0] for i in range(5)]
        , likert5StepDescriptions
        , loc='center left', bbox_to_anchor=(1.52, 0.5)
    )
    if suptitle==None:
        fig.suptitle(dataName, fontsize=16)
    
    if saveFig:
        #path = stackedBarPlotsFilenamePathStem + "/" + dataName
        #tryCreateFolder(path)
        fig.savefig(stackedBarPlotsFilenamePathStem + "/matrixStackedBars" + dataName)
    
for dataSet in datasets.keys():
    getStackedBarPlotsMatrix(
        dataName=dataSet
        , saveFig=True
        , suptitle=[]
    )

In [None]:
print(_dataName)
print(datasets.keys())

In [None]:
datasets["data20190603"][gameQuestion].value_counts()

In [None]:
datasets["data20190703"][gameQuestion].value_counts()

In [None]:
datasets["data20190828"][gameQuestion].value_counts()

In [None]:
for question in shortLikertQuestions:
    plotStackedBar(
        question
        , dataName="data20190828"
        , saveFig=False
        , printDebug=False
        , displayedGameNames=shortGameNames
    )

In [None]:
# test to check values displayed in the bar plots
data = _inputData.loc[:, [indexedQuestions[4], gameQuestion]].groupby([gameQuestion, indexedQuestions[4]]).size()
data['Dr Bug: Microbe Mayhem']

### SUS Likert scale score variance

In [None]:
saveFig = True

dataName = _dataName

for question in indexedLikertQuestions:
    fig = plt.figure()
    ax = fig.add_subplot(111)
    sns.barplot(x=gameQuestion, y=question, data=datasets[dataName], ax = ax)
    ax.set_xticklabels(shortGameNames)
    plt.xlabel(question)
    plt.ylabel('Agreement - Likert scale')
    plt.ylim(1, 5)

    if saveFig:
        shortQuestion = shortQuestions.index[shortQuestions.values==question].values[0]
        path = barPlotsErrorBarsFilenamePathStem + "/" + dataName
        tryCreateFolder(path)
        fig.savefig(path + "/" + shortQuestion)

In [None]:
saveFig = True

fig = plt.figure(figsize=(15,8))
graphIndex = 1

dataName = _dataName

for question in indexedLikertQuestions:

    ax = fig.add_subplot(3,4,graphIndex)
    graphIndex += 1
    sns.barplot(x=gameQuestion, y=question, data=datasets[dataName], ax = ax)
    shortQuestion = shortQuestions.index[shortQuestions.values==question].values[0]
    plt.xlabel('')
    plt.ylabel('')
    ax.set_xticklabels(['',shortQuestion,''])
    plt.ylim(1, 5)

if saveFig:
    path = barPlotsErrorBarsFilenamePathStem + "/" + dataName
    tryCreateFolder(path)
    fig.savefig(path + "/SUS-11-bar-graphs-3x4-matrix")

In [None]:
saveFig = True

fig = plt.figure(figsize=(15,8))
graphIndex = 1

dataName = _dataNameSUSNormalized

for question in indexedLikertQuestions:

    ax = fig.add_subplot(3,4,graphIndex)
    graphIndex += 1
    sns.barplot(x=gameQuestion, y=question, data=datasets[dataName], ax = ax)
    shortQuestion = shortQuestions.index[shortQuestions.values==question].values[0]
    plt.xlabel('')
    plt.ylabel('')
    ax.set_xticklabels(['',shortQuestion,''])
    plt.ylim(0, 4)

if saveFig:
    path = barPlotsErrorBarsFilenamePathStem + "/" + dataName
    tryCreateFolder(path)
    fig.savefig(path + "/SUS-normalized-11-bar-graphs-3x4-matrix")

## Negative vs Positive questions comparison

Let's try and know whether the answers from negative and positive questions are indistinguishable.

In [None]:
_inputDataSUSNormalized