# Analyzing survey data from Qualtrics

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100


In [None]:
data = pd.read_excel("106e_eval_20220427.xlsx")

In [None]:
data.columns

In [None]:
columns_to_drop = ['StartDate', 'EndDate', 'Status', 'Progress', 'Duration (in seconds)',
       'Finished', 'RecordedDate', 'ResponseId', 'DistributionChannel',
       'UserLanguage','feedback']

In [None]:
data = data.drop(columns=columns_to_drop)

In [None]:
data = data[1:]

## Find and replace on all values

In [None]:
rename_answers = {
    "Strongly disagree" : "1: Strongly disagree",
    "Somewhat disagree" : "2: Somewhat disagree",
    "In the middle" : "3: In the middle",
    "Neither agree nor disagree" : "3: In the middle",
    "Somewhat agree" : "4: Somewhat agree",
    "Strongly agree" : "5: Strongly agree",
    "I don't know or don't want to say" : np.nan,
    -99 : np.nan
    }


In [None]:
data = data.replace(rename_answers)

### Export to excel

In [None]:
data.to_excel("106e_eval_cleaned.xlsx")

## Calculating and visualizing results

In [None]:
data['zoom_quality'].value_counts()

In [None]:
data['zoom_quality'].value_counts(normalize=True)

In [None]:
data['zoom_quality'].value_counts(normalize=True).plot(kind='barh')

In [None]:
data['zoom_quality'].value_counts(normalize=True).plot(kind='barh')

In [None]:
import matplotlib.ticker as mtick

In [None]:
ax = data['zoom_quality'].value_counts(normalize=True).plot(kind='barh')
ax.set_title("zoom_quality")
ax.set_xlabel("Percent responding")
ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))

plt.savefig("zoom_quality_percent.png", bbox_inches='tight')

In [None]:
ax = data['hours_studying'].plot(kind='hist', bins=25)
ax.set_title("Hours Studying")

plt.savefig("hours_studying.png", bbox_inches='tight')

## Automating plots

### Cleaning up the results summaries from value_counts()

In [None]:
def cleanup_result(result):
    
    all_responses = ['1: Strongly disagree',
                    '2: Somewhat disagree',
                    '3: In the middle',
                    '4: Somewhat agree',
                    '5: Strongly agree']
    
    for response in all_responses:
        if response not in result.index:
            result = pd.concat([result,pd.Series({response:0})])
            
    if np.nan in result.index:
        result.index = result.index.fillna('No response / don\'t know')
    
    return result.sort_index()

In [None]:
result = data['interesting_topics'].value_counts(normalize=True, dropna=False)

In [None]:
result

In [None]:
cleanup_result(result)

### Looping through all the columns except the first and last and plotting each graph

In [None]:
for column in data.columns[1:-1]:
    print(column)

In [None]:
!mkdir figures

In [None]:
for column in data.columns[1:-1]:
    result = data[column].value_counts(normalize=True, dropna=False)
    result = cleanup_result(result)
    
    ax = result.plot(kind='barh')
    
    ax.set_title(column)
    ax.set_xlabel("Percent responding")
    
    plt.show()
    
    plt.savefig("figures/" + column + "_percent.png", bbox_inches='tight')