# Convert raw data from Qualtrics into a usable dataframe

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('../data/qualtrics_2013.02.13.csv')

In [4]:
df = df[df['Finished'] == True]
df.columns

Index(['Finished', 'RecordedDate', 'ResponseId', 'Agreement', 'Prolific Id',
       'Education', 'Ethnicity', 'Gender', 'Native Language', 'Birth Year',
       'Summary 1.1', 'Summary 1.2', 'Summary 1.3', 'Summary 1.4',
       'PROLIFIC_PID'],
      dtype='object')

### Convert the raw data to long format

In [5]:
df_long = pd.melt(df[['PROLIFIC_PID', 'RecordedDate', 'ResponseId', 'Agreement', 'Prolific Id',
       'Education', 'Ethnicity', 'Gender', 'Native Language', 'Birth Year', 'Summary 1.1', 'Summary 1.2', 'Summary 1.3', 'Summary 1.4']],id_vars=['PROLIFIC_PID', 'RecordedDate', 'ResponseId', 'Agreement', 'Prolific Id',
       'Education', 'Ethnicity', 'Gender', 'Native Language', 'Birth Year', ], var_name='question', value_name='summary')
df_long = df_long[df_long['summary'].notna()]
df_long['section'] = df_long['question'].apply(lambda x: x.split('.')[1])
df_long['summary'] = df_long['summary'].apply(lambda x: x.lower())
df_long['id'] = df_long["PROLIFIC_PID"]+df_long["section"]
len(df_long)

123

In [8]:
df_long.to_csv('../data/qualtrics_survey_results.csv')

### The function to send the request to the api

In [13]:
import requests

api_url = "https://textbook-summary-api-zoghmeioka-ue.a.run.app/score"

def get_results(summary, section):
    result = requests.post(
        api_url,
        json={
            "chapter_index": 1,
            "section_index": section,
            "summary": summary,
        }).json()
    return result['content'], result['wording'], result['containment'], result['similarity']

### Build a dictionary using the api

In [14]:
output_dict = {'id': [], 'content':[], 'wording':[], 'containment':[], 'similarity':[]}
counter = 0
for row in df_long.iterrows():
    print(counter, end='\r')
    content, wording, containment, similarity = get_results(row[1]['summary'], row[1]['section'])
    output_dict['id'].append(row[1]['id'])
    output_dict['content'].append(content)
    output_dict['wording'].append(wording)
    output_dict['containment'].append(containment)
    output_dict['similarity'].append(similarity)
    counter += 1

0

### Merge the results with the original dataframe

In [16]:
df_output = pd.DataFrame.from_dict(output_dict)
final_df = df_output.merge(df_long, on='id')
final_df['word_count'] = final_df['summary'].apply(lambda x: len(x.split()))

### Save to file

In [17]:
final_df.to_csv('../data/qualtrics_survey.csv')