In [1]:
import os
import pandas as pd
import json

from typing import Dict

# Interactive form analysis

In [2]:
output_dir = './output'

In [3]:
response_filenames = os.listdir(output_dir)

In [4]:
response_paths = map(lambda file: os.path.join(output_dir, file), response_filenames)

### 1. Retrieval of the responses

Schema for the response JSON (in Golang)

```
type annotationType struct {
	Source              string   `json:"source"`
	Words               []int    `json:"words"`
	UserCategories      []string `json:"userCategories"`
	ProvidedhCategories []string `json:"providedhCategories"`
}

type userType struct {
	Age        int    `json:"age"`
	Gender     string `json:"gender"`
	Education  string `json:"education"`
	Field      string `json:"field"`
	PreviousDH string `json:"previousDH"`
	Researcher string `json:"researcher"`
}

type taxonomyVersion []string

type taxonomyType struct {
	NewCategory     string            `json:"newCategory"`
	DraggedCategory string            `json:"draggedCategory"`
	Categories      taxonomyVersion   `json:"categories"`
	Historic        []taxonomyVersion `json:"historic"`
}

type Response struct {
	Annotations []annotationType `json:"annotations"`
	User        userType         `json:"user"`
	Taxonomy    taxonomyType     `json:"taxonomy"`
}
```

In [5]:
def dict_from_json(filepath: str) -> Dict:
    try:
        with open(filepath, 'r') as f:
            contents = f.read()
    except Exception as e:
        print(e)
        return dict()
    
    try:
        dictionary = json.loads(contents)
        return dictionary
    except Exception as e:
        print(e)
        return dict()

In [6]:
response_dicts = map(dict_from_json, response_paths)
responses = zip(response_filenames, response_dicts)

### 2. Create the Pandas dataframe

In [7]:
annotations_df = pd.DataFrame(columns=[
    'file', 
    'source', 
    'user_categories', 
    'providedh_categories', 
    'annotated_text',
    'start_index',
    'end_index',
])

users_df = pd.DataFrame(columns=[
    'file',
    'age',
    'gender',
    'education',
    'field',
    'previous_dh',
    'researcher'
])

taxonomy_df = pd.DataFrame(columns=[
    'file',
    'name'
])

taxonomy_versions = dict()

In [8]:
for [file, response_dict] in responses:
    # annotations
    for annotation in response_dict['annotations']:
        annotations_df.loc[len(annotations_df), :] = {
            'file': file, 
            'source': annotation['source'], 
            'user_categories': ','.join(annotation['userCategories']),
            'providedh_categories': ','.join(annotation['providedhCategories']),
            'annotated_text': '',#annotation[''],
            'start_index': annotation['words'][0],
            'end_index': annotation['words'][1]
        }
    
    # user details
    users_df.loc[len(users_df), :] = {
        'file': file,
        'age': response_dict['user']['age'],
        'gender': response_dict['user']['gender'],
        'education': response_dict['user']['education'],
        'field': response_dict['user']['field'],
        'previous_dh': response_dict['user']['previousDH'],
        'researcher': response_dict['user']['researcher']
    }
    
    # taxonomy
    for category in response_dict['taxonomy']['categories']:
        taxonomy_df.loc[len(taxonomy_df), :] = {
            'file': file,
            'name': category
        }
        
    #historic
    taxonomy_versions[file] = response_dict['taxonomy']['historic']

In [9]:
annotations_df

Unnamed: 0,file,source,user_categories,providedh_categories,annotated_text,start_index,end_index
0,reponse_8.json,3,Ambiguity,Ignorance,,33,36


In [10]:
users_df

Unnamed: 0,file,age,gender,education,field,previous_dh,researcher
0,reponse_8.json,40,Unspecified,,,no,no
1,reponse_7.json,40,Unspecified,,,no,no


In [12]:
taxonomy_df

Unnamed: 0,file,name
0,reponse_8.json,Ambiguity
1,reponse_8.json,gaps
2,reponse_7.json,Ambiguity
3,reponse_7.json,gaps


In [13]:
taxonomy_versions

{'reponse_8.json': [['Ambiguity'], ['Ambiguity', 'gaps']],
 'reponse_7.json': [['Ambiguity'], ['Ambiguity', 'gaps']]}