In [1]:
%load_ext rpy2.ipython
%load_ext autoreload
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import regex as re
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import os
import sys
from datetime import datetime
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
from data_cleaning import *

# Merge raw data files

In [3]:
# Rename colmns in old CSV file to facilitate concatenation
# old_cols = set(mp_raw_1.columns).difference(set(mp_raw_2.columns)) # Create list of columns that needs to be renamed

# new_cols = [re.search('^.+(?=\.\w+$)', col).group(0) for col in old_cols] # Create list of new names for the columns

# rename_cols = {} # Initialize dictionary to store old and new column names

# for old_col, new_col in zip(old_cols, new_cols):
#     rename_cols[old_col] = new_col

# mp_raw_1.rename(columns=rename_cols, inplace=True)

In [4]:
# mp_fix_labels = {'school_details.UDISE_cd_label': {23310607203: 23310607101, 23310607802: 23310601105, 23310613802: 23310601104}, \
#                         'school_details.School_label': {'NMS SANKLA JAGTHAR': 'PS JAGTHAR', 'MS SANOTI':'EGS SALAIYAPURA', 'EGS TAPRA KACHHI': 'EGS KHARI TAPRA'}}

# mp_mask_idx = mp_raw.index[mp_raw.assessment_date == '2022-10-10']
# mp_raw.loc[mask_idx, :] = mp_raw.loc[mp_mask_idx, :].replace(mp_fix_labels)

# up_fix_labels = {'school_details.UDISE_cd_label': {9691406101: 9691405001}, \
#                  'school_details.School_label': {'P.S. SANDWA': 'P.S. DHAURUPUR'}}

# up_mask = ((up_raw.assessment_date == '2022-10-21') & ((up_raw.tabletUserName == 'Munish')  | (up_raw.tabletUserName == 'Poonamkumari')))
# up_mask_idx = up_raw.index[up_mask]

# up_raw.loc[up_mask_idx, :] = up_raw.loc[up_mask_idx, :].replace(up_fix_labels)

# Data Cleaning Functions

In [5]:
def outlier_treatment(dataframe, score_col):
    iqr = np.percentile(dataframe[score_col], 75) - np.percentile(dataframe[score_col], 25)
    outliers = dataframe[dataframe[score_col] >= 1.5*iqr]
    return (outliers)

# Import merged raw data file

In [6]:
%autoreload
# Change working directory
os.chdir(r'/home/rohitdaniel/Documents/CSF_FLN_Evaluation/SLO_Baseline/rawData')

# Import concatenated raw dataframe for cleaning
mp_raw = pd.read_pickle("./mp_raw_full.pkl")
mp_raw.name = 'mp_raw'

up_raw = pd.read_pickle("./up_raw_full.pkl")
up_raw.name = 'up_raw'

## A. Literacy Sub-tasks Data Cleaning

In [7]:
%autoreload
mp_raw_lit, mp_raw_num = data_cleaning.clean_data(mp_raw)
mp_raw_lit.name = 'MP literacy'
mp_raw_num.name = 'MP numeracy'

In [8]:
%autoreload
up_raw_lit, up_raw_num = data_cleaning.clean_data(up_raw)
up_raw_lit.name = 'UP literacy'
up_raw_num.name = 'UP numeracy'

In [9]:
for dataframe in [mp_raw_lit, up_raw_lit]:
    duration_cols = [col for col in dataframe.columns if re.search(r'.+time_remaining$', col)]
    data_cleaning.clean_scores(dataframe, duration_cols)

In [10]:
pid_cols = ['tabletUserName', 'assessment_date', 'school_details.State_label',\
            'school_details.District_label', 'school_details.Block_label',\
            'school_details.School_label', 'school_details.UDISE_cd_label',\
            'SI_std_name', 'GI_std_name', 'student_age', 'student_gender'] 
rename_cols = {'tabletUserName':'evaluator_name', 'school_details.State_label':'state',\
               'school_details.District_label':'district', 'school_details.Block_label':'block',\
               'school_details.School_label':'school_name', 'school_details.UDISE_cd_label':'udise',\
               'SI_std_name':'student_name', 'GI_std_name':'student_roll'}
mp_lit_scores = mp_raw_lit[pid_cols].rename(columns=rename_cols)
up_lit_scores = up_raw_lit[pid_cols].rename(columns=rename_cols)

### Literacy 1: Listening Comprehension

In [11]:
%autoreload
lit1_or_sheets_mp = pd.ExcelFile(r"../cleaned_data/mp_literacy1_other_responses_vf.xlsx")
data_cleaning.correct_or(mp_raw_lit, lit1_or_sheets_mp)

In [12]:
%autoreload
lit1_or_sheets_up = pd.ExcelFile(r"../cleaned_data/up_literacy1_other_responses_vf.xlsx")
data_cleaning.correct_or(up_raw_lit, lit1_or_sheets_up)

In [13]:
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit1_raw = [col for col in dataframe[0].columns if re.search(r'literacy1_q\d$', col)]
#     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 1 scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit1_raw)
       
    # Calculate total score and percentage correct on listening comprehension sub-task
    dataframe[1].loc[:, 'listening_comprehension_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[col] for col in lit1_raw]), axis=1)
    dataframe[1].loc[:, 'listening_comprehension_%_correct'] = dataframe[1].apply(lambda x: 100*x['listening_comprehension_total']/len(lit1_raw), axis=1)
    
#     # Extract other responses to listening comprehension questions
#     lit1_or = [col for col in dataframe.columns if re.search(r'^literacy1_\S*or$', col)]
#     file_name = dataframe.name + '_lit1_or.xlsx'
#     with pd.ExcelWriter(file_name) as writer: 
#         for col in lit1_or:
#             dataframe[col].value_counts().reset_index().rename(columns = {'index':"Response", col:'Frequency'}).to_excel(writer, sheet_name=col)

<h3>
    Literacy 2: Oral Vocabulary
</h3>

In [14]:
%autoreload
lit2_or_sheets_mp = pd.ExcelFile(r"../cleaned_data/mp_literacy2_other_responses_vf.xlsx")
data_cleaning.correct_or(mp_raw_lit, lit2_or_sheets_mp)

In [15]:
%autoreload
lit2_or_sheets_up = pd.ExcelFile(r"../cleaned_data/up_literacy2_other_responses_vf.xlsx")
data_cleaning.correct_or(up_raw_lit, lit2_or_sheets_up)

In [16]:
# Check data for'UNDEFINED' or 'SKIPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit2_raw = [col for col in dataframe[0].columns if re.search(r'literacy2_q\d+$', col)]
#     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 2 scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit2_raw)
        
    dataframe[1].loc[:, 'oral_vocabulary_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[col] for col in lit2_raw]), axis=1)
    dataframe[1].loc[:, 'oral_vocabulary_%_correct'] = dataframe[1].apply(lambda x: 100*x['oral_vocabulary_total']/len(lit2_raw), axis=1)
    
#     # Extract other responses to oral vocabulary questions
#     lit2_or = [col for col in dataframe.columns if re.search(r'literacy2\S*or$', col)]
#     file_name = dataframe.name + '_lit2_or.xlsx'
#     with pd.ExcelWriter(file_name) as writer: 
#         for col in lit2_or:
#             dataframe[col].value_counts().reset_index().rename(columns = {'index':"Response", col:'Frequency'}).to_excel(writer, sheet_name=col)

<h3>
    Literacy 3: Initial Sound Identification
</h3>

In [17]:
# Check data for'UNDEFINED' or 'SKIPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit3_raw = [col for col in dataframe[0].columns if re.search(r'literacy3_q\d+$', col)]
#     print(f'\033[1mCleaning literacy {dataframe.name} subtask 3 scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit3_raw)
    
    dataframe[1].loc[:, 'initial_sound_identification_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[col] for col in lit3_raw]), axis=1)
    dataframe[1].loc[:, 'initial_sound_identification_%_correct'] = dataframe[1].apply(lambda x: 100*x['initial_sound_identification_total']/len(lit3_raw), axis=1)

<h3>
    Literacy 4: Letter Naming (Untimed)
</h3>

In [18]:
# Check data for'UNDEFINED' or 'SKIPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit4_ut_raw = [col for col in dataframe[0].columns if re.search(r'literacy4_ut_grid_\d*$', col)]
#     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 4 (untimed) scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit4_ut_raw)
    dataframe[0].loc[:, lit4_ut_raw] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit4_ut_raw]), axis=1)).to_list(), index=dataframe[0].index, columns=lit4_ut_raw)
    
    # Calculate total score on letter naming (untimed) sub-task
    dataframe[1].loc[:, 'letter_naming_untimed_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[score] for score in lit4_ut_raw]), axis=1)
    dataframe[1].loc[:, 'letter_naming_untimed_%_correct'] = dataframe[1].apply(lambda x: 100*x['letter_naming_untimed_total']/len(lit4_ut_raw), axis=1)

### Literacy 4: Letter Naming (Timed)

In [19]:
%autoreload
# Check data for 'UNDEFINED' or 'SKIPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit4_tt_raw = [col for col in dataframe[0].columns if re.search(r'literacy4_tt.+', col)]
    lit4_tt_scores = [col for col in dataframe[0].columns if re.search(r'literacy4_tt_grid_\d+$', col)]

    # print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 4 (timed) scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit4_tt_scores)
    dataframe[0].loc[:, lit4_tt_scores] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit4_tt_scores]), axis=1)).to_list(), index=dataframe[0].index, columns=lit4_tt_scores)
    # Calculate automaticity on letter naming (timed) sub-task
    dataframe[1].loc[:, 'letter_naming_timed_fluency'] = dataframe[0].apply(lambda x: calculations.fluency_score(x, lit4_tt_raw), axis=1)

<h3>
    Literacy 5: Familiar Words Reading (Untimed)
</h3>

In [20]:
# Check data for 'UNDEFINED' or 'SKIPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit5_ut_raw = [col for col in dataframe[0].columns if re.search(r'literacy5_ut_grid_\d*$', col)]
#     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 5 (untimed) scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit5_ut_raw)
    dataframe[0].loc[:, lit5_ut_raw] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit5_ut_raw]), axis=1)).to_list(), index=dataframe[0].index, columns=lit5_ut_raw)
    
    dataframe[1].loc[:, 'familiar_words_untimed_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[score] for score in lit5_ut_raw]), axis=1)
    dataframe[1].loc[:, 'familiar_words_untimed_%_correct'] = dataframe[1].apply(lambda x: 100*x['familiar_words_untimed_total']/len(lit5_ut_raw), axis=1)

<h3>
    Literacy 5: Familiar Words Reading (Timed)
</h3>

In [21]:
# Check data for 'UNDEFINED' or 'SKIPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit5_tt_raw = [col for col in dataframe[0].columns if re.search(r'literacy5_tt.+', col)]
    lit5_tt_scores = [col for col in dataframe[0].columns if re.search(r'literacy5_tt_grid_\d+$', col)]

    # print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 5 (timed) scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit5_tt_scores)
    dataframe[0].loc[:, lit5_tt_scores] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit5_tt_scores]), axis=1)).to_list(), index=dataframe[0].index, columns=lit5_tt_scores)
    
    # Calculate automaticity on letter naming (timed) sub-task
    dataframe[1].loc[:, 'familiar_words_timed_fluency'] = dataframe[0].apply(lambda x: calculations.fluency_score(x, lit5_tt_raw), axis=1)

### Literacy 6: Non-word Reading

In [22]:
# Check data for 'UNDEFINED' or 'SKIPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit6_tt_raw = [col for col in dataframe[0].columns if re.search(r'literacy6_tt.+', col)]
    lit6_tt_scores = [col for col in dataframe[0].columns if re.search(r'literacy6_tt_grid_\d+$', col)]
    #     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 6 scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit6_tt_scores)
    dataframe[0].loc[:, lit6_tt_scores] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit6_tt_scores]), axis=1)).to_list(), index=dataframe[0].index, columns=lit6_tt_scores)
    
    # Calculate automaticity on letter naming (timed) sub-task
    dataframe[1].loc[:, 'non_words_timed_fluency'] = dataframe[0].apply(lambda x: calculations.fluency_score(x, lit6_tt_raw), axis=1)

### Literacy 7: Oral Reading Fluency (Timed)

In [23]:
# Check data for 'UNDEFINED' or 'SKIPPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit7_raw = [col for col in dataframe[0].columns if re.search(r'literacy7_tt.+', col)]
    lit7_scores = [col for col in dataframe[0].columns if re.search(r'literacy7_tt_grid_\d+$', col)]
    
    # print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 7 scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit7_scores)
    dataframe[0].loc[:, lit7_scores] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit7_scores]), axis=1)).to_list(), index=dataframe[0].index, columns=lit7_scores)
    
     # Calculate automaticity on letter naming (timed) sub-task
    dataframe[1].loc[:, 'oral_reading_fluency'] = dataframe[0].apply(lambda x: calculations.fluency_score(x, lit7_raw), axis=1)

### Literacy 8: Reading Comprehension (Untimed)

In [24]:
# Check data for 'UNDEFINED' or 'SKIPPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit8_raw_reading = [col for col in dataframe[0].columns if re.search(r'literacy8_ut_grid_\d*$', col)]
    lit8_raw_comprehension = [col for col in dataframe[0].columns if re.search(r'literacy8_ut_q\d*$', col)]
#     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 8 scores\033[0m')
    for scores in [lit8_raw_reading, lit8_raw_comprehension]:
        data_cleaning.clean_scores(dataframe[0], scores)
    dataframe[0].loc[:, lit8_raw_reading] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit8_raw_reading]), axis=1)).to_list(), index=dataframe[0].index, columns=lit8_raw_reading)
    
    dataframe[1].loc[:, 'reading_untimed_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[score] for score in lit8_raw_reading]), axis=1)
    dataframe[1].loc[:, 'reading_untimed_%_correct'] = dataframe[1].apply(lambda x: 100*x['reading_untimed_total']/len(lit8_raw_reading), axis=1)
    dataframe[1].loc[:, 'reading_comprehension_untimed_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[score] for score in lit8_raw_comprehension]), axis=1)
    dataframe[1].loc[:, 'reading_comprehension_untimed_%_correct'] = dataframe[1].apply(lambda x: 100*x['reading_comprehension_untimed_total']/len(lit8_raw_comprehension), axis=1)

### Literacy 9a: Dictation (Letters)

In [25]:
# Check data for 'UNDEFINED' or 'SKIPPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit9a_raw = [col for col in dataframe[0].columns if re.search(r'literacy9a_ut_grid_\d*', col)]
#     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 9a scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit9a_raw)
    dataframe[0].loc[:, lit9a_raw] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit9a_raw]), axis=1)).to_list(), index=dataframe[0].index, columns=lit9a_raw)
    dataframe[1].loc[:, 'dictation_letters_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[score] for score in lit9a_raw]), axis=1)
    dataframe[1].loc[:, 'dictation_letters_%_correct'] = dataframe[1].apply(lambda x: 100*x['dictation_letters_total']/len(lit9a_raw), axis=1)
    # dataframe.loc[:, 'literacy9a_total'] = dataframe.apply(lambda x: validated_scores(x, mp_validated), axis=1)

In [26]:
# mp_dictation_letters_validation = mp_raw_lit[mp_raw_lit.literacy9a_total >= 5][['assessment_date', 'school_details.State_label', 'school_details.District_label', \
#                                               'school_details.Block_label', 'school_details.School_label', \
#                                               'school_details.UDISE_cd_label', 'SI_std_name', 'GI_std_name', 'student_age', \
#                                               'student_gender', 'literacy9a_total']]

In [27]:
# up_dictation_letters_validation = up_raw_lit[up_raw_lit.literacy9a_total >= 5][['assessment_date', 'school_details.State_label', 'school_details.District_label', \
#                                               'school_details.Block_label', 'school_details.School_label', \
#                                               'school_details.UDISE_cd_label', 'SI_std_name', 'GI_std_name', 'student_age', \
#                                               'student_gender', 'literacy9a_total']]

### Literacy 9b: Dictation (Words)

In [28]:
# Check data for 'UNDEFINED' or 'SKIPPPED' values
for dataframe in {'mp': [mp_raw_lit, mp_lit_scores], 'up': [up_raw_lit, up_lit_scores]}.values():
    lit9b_raw = [col for col in dataframe[0].columns if re.search(r'literacy9b_ut_grid_\d*', col)]
#     print(f'\033[1mCleaning literacy {dataframe[0].name} subtask 9b scores\033[0m')
    data_cleaning.clean_scores(dataframe[0], lit9b_raw)
    dataframe[0].loc[:, lit9b_raw] = pd.DataFrame((dataframe[0].apply(lambda x: data_cleaning.fix_scores([x[col] for col in lit9b_raw]), axis=1)).to_list(), index=dataframe[0].index, columns=lit9b_raw)
    
    dataframe[1].loc[:, 'dictation_words_total'] = dataframe[0].apply(lambda x: calculations.total_score([x[score] for score in lit9b_raw]), axis=1)
    dataframe[1].loc[:, 'dictation_words_%_correct'] = dataframe[1].apply(lambda x: 100*x['dictation_words_total']/len(lit9b_raw), axis=1)


In [29]:
# mp_dictation_words_validation = mp_raw_lit[mp_raw_lit.literacy9b_total >= 5][['assessment_date', 'school_details.State_label', 'school_details.District_label', \
#                                               'school_details.Block_label', 'school_details.School_label', \
#                                               'school_details.UDISE_cd_label', 'SI_std_name', 'GI_std_name', 'student_age', \
#                                               'student_gender', 'literacy9b_total']]

In [30]:
# up_dictation_words_validation = up_raw_lit[up_raw_lit.literacy9b_total >= 5][['assessment_date', 'school_details.State_label', 'school_details.District_label', \
#                                               'school_details.Block_label', 'school_details.School_label', \
#                                               'school_details.UDISE_cd_label', 'SI_std_name', 'GI_std_name', 'student_age', \
#                                               'student_gender', 'literacy9b_total']]

In [31]:
# mp_dictation_validation = pd.merge(left=mp_dictation_letters_validation, right=mp_dictation_words_validation, how='outer')
# mp_dictation_validation.to_excel('mp_dictation_validation.xlsx')

In [32]:
# up_dictation_validation = pd.merge(left=up_dictation_letters_validation, right=up_dictation_words_validation, how='outer')
# up_dictation_validation.to_excel('up_dictation_validation.xlsx')

In [33]:
cols_renamed = {'tabletUserName': 'evaluator_name',\
 'assessment_date': 'assessment_date',\
 'school_details.State_label': 'state',\
 'school_details.District_label': 'district',\
 'school_details.Block_label': 'block',\
 'school_details.School_label': 'school_name',\
 'school_details.UDISE_cd_label': 'udise',\
 'SI_std_name': 'student_name',\
 'GI_std_name': 'student_roll',\
 'student_age': 'student_age',\
 'student_gender': 'student_gender',\
 'literacy1_q1': 'listening_comprehension: 1. इस कहानी में कौन-कौन से जानवर है?',\
 'literacy1_q1_or': 'listening_comprehension: 1. other_responses',\
 'literacy1_q2': 'listening_comprehension: 2. बिल्ली कहाँ रहती है?',\
 'literacy1_q2_or': 'listening_comprehension: 2. other_responses',\
 'literacy1_q3': 'listening_comprehension: 3. दूध पीते-पीते बिल्ली ने क्या देखा?',\
 'literacy1_q3_or': 'listening_comprehension: 3. other_responses',\
 'literacy1_q4': 'listening_comprehension: 4. चूहे को देखकर बिल्ली ने क्या किया?',\
 'literacy1_q4_or': 'listening_comprehension: 4. other_responses',\
 'literacy1_end': 'listening_comprehension: end',\
 'literacy2_p_q': 'oral_vocabulary_practice: बिल्ली',\
 'literacy2_q1': 'oral_vocabulary: 1. घर',\
 'literacy2_q1_or': 'oral_vocabulary: 1. other_responses',\
 'literacy2_q2': 'oral_vocabulary: 2. पतंग',\
 'literacy2_q2_or': 'oral_vocabulary: 2. other_responses',\
 'literacy2_q3': 'oral_vocabulary: 3. बक्सा/ खोखा',\
 'literacy2_q3_or': 'oral_vocabulary: 3. other_responses',\
 'literacy2_q4': 'oral_vocabulary: 4. किताब/पुस्तक/बुक',\
 'literacy2_q4_or': 'oral_vocabulary: 4. other_responses',\
 'literacy2_q5': 'oral_vocabulary: 5. छाता/छतरी',\
 'literacy2_q5_or': 'oral_vocabulary: 5. other_responses',\
 'literacy2_q6': 'oral_vocabulary: 6. मटका/घड़ा/टोकनी',\
 'literacy2_q6_or': 'oral_vocabulary: 6. other_responses',\
 'literacy2_q7': 'oral_vocabulary: 7. लड़का सो रहा है/नींद/सो जाना',\
 'literacy2_q7_or': 'oral_vocabulary: 7. other_responses',\
 'literacy2_q8': 'oral_vocabulary: 8. लड़की खाना खा रही है/खाना',\
 'literacy2_q8_or': 'oral_vocabulary: 8. other_responses',\
 'literacy2_q9': 'oral_vocabulary: 9. लड़की किताब पढ़ रही है\पुस्तक पढ़ रही है\पढ़ना',\
 'literacy2_q9_or': 'oral_vocabulary: 9. other_responses',\
 'literacy2_q10': 'oral_vocabulary: 10. लड़का साइकिल चला रहा है',\
 'literacy2_q10_or': 'oral_vocabulary: 10. other_responses',\
 'literacy2_end': 'oral_vocabulary: end',\
 'literacy3_p_q1': 'initial_sound_recognition_practice: चल',\
 'literacy3_p_q2': 'initial_sound_recognition_practice: सब',\
 'literacy3_q1': 'initial_sound_recognition: आम',\
 'literacy3_q2': 'initial_sound_recognition: 2. रथ',\
 'literacy3_q3': 'initial_sound_recognition: 3. एक',\
 'literacy3_q4': 'initial_sound_recognition: 4. पल',\
 'literacy3_q5': 'initial_sound_recognition: 5. ऊँट',\
 'literacy3_q6': 'initial_sound_recognition: 6. ईंट',\
 'literacy3_q7': 'initial_sound_recognition: 7. घात',\
 'literacy3_q8': 'initial_sound_recognition: 8. कप',\
 'literacy3_q9': 'initial_sound_recognition: 9. तोता',\
 'literacy3_q10': 'initial_sound_recognition: 10. झरना',\
 'literacy3_end': 'initial_sound_identification: end',
 'literacy4_p_q1': 'letter_naming_practice: प',\
 'literacy4_p_q2': 'letter_naming_practice: भ',\
 'literacy4_ut_grid_1': 'letter_naming_untimed: 1. ह',\
 'literacy4_ut_grid_2': 'letter_naming_untimed: 2. स',\
 'literacy4_ut_grid_3': 'letter_naming_untimed: 3. य',\
 'literacy4_ut_grid_4': 'letter_naming_untimed: 4. ध',\
 'literacy4_ut_grid_5': 'letter_naming_untimed: 5. आ',\
 'literacy4_ut_grid_6': 'letter_naming_untimed: 6. फ',\
 'literacy4_ut_grid_7': 'letter_naming_untimed: 7. त्र',\
 'literacy4_ut_grid_8': 'letter_naming_untimed: 8. छ',\
 'literacy4_ut_grid_9': 'letter_naming_untimed: 9. उ',\
 'literacy4_ut_grid_10': 'letter_naming_untimed: 10. ख',\
 'literacy4_ut_grid_11': 'letter_naming_untimed: 11. क्ष',\
 'literacy4_ut_grid_12': 'letter_naming_untimed: 12. ज्ञ',\
 'literacy4_ut_grid_13': 'letter_naming_untimed: 13. ई',\
 'literacy4_ut_grid_14': 'letter_naming_untimed: 14. वु',\
 'literacy4_ut_grid_15': 'letter_naming_untimed: 15. मे',\
 'literacy4_ut_grid.number_of_items_correct': 'letter_naming_untimed: number_of_items_correct',\
 'literacy4_ut_grid.number_of_items_attempted': 'letter_naming_untimed: number_of_items_attempted',\
 'literacy4_ut_grid.gridAutoStopped': 'letter_naming_untimed: gridAutoStopped',\
 'literacy4_ut_grid.autoStop': 'letter_naming_untimed: autoStop',\
 'literacy4_ut_end': 'letter_naming_untimed: end',\
 'literacy4_tt_grid_1': 'letter_naming_timed: 1. म',\
 'literacy4_tt_grid_2': 'letter_naming_timed: 2. न',\
 'literacy4_tt_grid_3': 'letter_naming_timed: 3. प',\
 'literacy4_tt_grid_4': 'letter_naming_timed: 4. ट',\
 'literacy4_tt_grid_5': 'letter_naming_timed: 5. क',\
 'literacy4_tt_grid_6': 'letter_naming_timed: 6. त',\
 'literacy4_tt_grid_7': 'letter_naming_timed: 7. प',\
 'literacy4_tt_grid_8': 'letter_naming_timed: 8. ब',\
 'literacy4_tt_grid_9': 'letter_naming_timed: 9. ल',\
 'literacy4_tt_grid_10': 'letter_naming_timed: 10. य',\
 'literacy4_tt_grid_11': 'letter_naming_timed: 11. ट',\
 'literacy4_tt_grid_12': 'letter_naming_timed: 12. ड',\
 'literacy4_tt_grid_13': 'letter_naming_timed: 13. ए',\
 'literacy4_tt_grid_14': 'letter_naming_timed: 14. त',\
 'literacy4_tt_grid_15': 'letter_naming_timed: 15. छ',\
 'literacy4_tt_grid_16': 'letter_naming_timed: 16. द',\
 'literacy4_tt_grid_17': 'letter_naming_timed: 17. अ',\
 'literacy4_tt_grid_18': 'letter_naming_timed: 18. ए',\
 'literacy4_tt_grid_19': 'letter_naming_timed: 19. र',\
 'literacy4_tt_grid_20': 'letter_naming_timed: 20. ह',\
 'literacy4_tt_grid_21': 'letter_naming_timed: 21. प',\
 'literacy4_tt_grid_22': 'letter_naming_timed: 22. ई',\
 'literacy4_tt_grid_23': 'letter_naming_timed: 23. आ',\
 'literacy4_tt_grid_24': 'letter_naming_timed: 24. न',\
 'literacy4_tt_grid_25': 'letter_naming_timed: 25. र',\
 'literacy4_tt_grid_26': 'letter_naming_timed: 26. घ',\
 'literacy4_tt_grid_27': 'letter_naming_timed: 27. अ',\
 'literacy4_tt_grid_28': 'letter_naming_timed: 28. भ',\
 'literacy4_tt_grid_29': 'letter_naming_timed: 29. व',\
 'literacy4_tt_grid_30': 'letter_naming_timed: 30. ब',\
 'literacy4_tt_grid_31': 'letter_naming_timed: 31. क',\
 'literacy4_tt_grid_32': 'letter_naming_timed: 32. उ',\
 'literacy4_tt_grid_33': 'letter_naming_timed: 33. च',\
 'literacy4_tt_grid_34': 'letter_naming_timed: 34. ऊ',\
 'literacy4_tt_grid_35': 'letter_naming_timed: 35. ट',\
 'literacy4_tt_grid_36': 'letter_naming_timed: 36. स',\
 'literacy4_tt_grid_37': 'letter_naming_timed: 37. ग',\
 'literacy4_tt_grid_38': 'letter_naming_timed: 38. ढ',\
 'literacy4_tt_grid_39': 'letter_naming_timed: 39. न',\
 'literacy4_tt_grid_40': 'letter_naming_timed: 40. ख',\
 'literacy4_tt_grid_41': 'letter_naming_timed: 41. ए',\
 'literacy4_tt_grid_42': 'letter_naming_timed: 42. ज',\
 'literacy4_tt_grid_43': 'letter_naming_timed: 43. ध',\
 'literacy4_tt_grid_44': 'letter_naming_timed: 44. इ',\
 'literacy4_tt_grid_45': 'letter_naming_timed: 45. ल',\
 'literacy4_tt_grid_46': 'letter_naming_timed: 46. आ',\
 'literacy4_tt_grid_47': 'letter_naming_timed: 47. ठ',\
 'literacy4_tt_grid_48': 'letter_naming_timed: 48. म',\
 'literacy4_tt_grid_49': 'letter_naming_timed: 49. न',\
 'literacy4_tt_grid_50': 'letter_naming_timed: 50. इ',\
 'literacy4_tt_grid_51': 'letter_naming_timed: 51. थ',\
 'literacy4_tt_grid_52': 'letter_naming_timed: 52. श',\
 'literacy4_tt_grid_53': 'letter_naming_timed: 53. अ',\
 'literacy4_tt_grid_54': 'letter_naming_timed: 54. फ',\
 'literacy4_tt_grid_55': 'letter_naming_timed: 55. त',\
 'literacy4_tt_grid_56': 'letter_naming_timed: 56. ऊ',\
 'literacy4_tt_grid_57': 'letter_naming_timed: 57. ऐ',\
 'literacy4_tt_grid_58': 'letter_naming_timed: 58. का',\
 'literacy4_tt_grid_59': 'letter_naming_timed: 59. मौ',\
 'literacy4_tt_grid_60': 'letter_naming_timed: 60. तू',\
 'literacy4_tt_grid_61': 'letter_naming_timed: 61. बि',\
 'literacy4_tt_grid_62': 'letter_naming_timed: 62. वू',\
 'literacy4_tt_grid_63': 'letter_naming_timed: 63. ते',\
 'literacy4_tt_grid_64': 'letter_naming_timed: 64. पो',\
 'literacy4_tt_grid_65': 'letter_naming_timed: 65. की',\
 'literacy4_tt_grid_66': 'letter_naming_timed: 66. तु',\
 'literacy4_tt_grid_67': 'letter_naming_timed: 67. रो',\
 'literacy4_tt_grid_68': 'letter_naming_timed: 68. हो',\
 'literacy4_tt_grid_69': 'letter_naming_timed: 69. नै',\
 'literacy4_tt_grid_70': 'letter_naming_timed: 70. नी',\
 'literacy4_tt_grid_71': 'letter_naming_timed: 71. गा',\
 'literacy4_tt_grid_72': 'letter_naming_timed: 72. मि',\
 'literacy4_tt_grid_73': 'letter_naming_timed: 73. ण',\
 'literacy4_tt_grid_74': 'letter_naming_timed: 74. औ',\
 'literacy4_tt_grid_75': 'letter_naming_timed: 75. त्र',\
 'literacy4_tt_grid_76': 'letter_naming_timed: 76. ज्ञ',\
 'literacy4_tt_grid_77': 'letter_naming_timed: 77. क्ष',\
 'literacy4_tt_grid_78': 'letter_naming_timed: 78. झ',\
 'literacy4_tt_grid_79': 'letter_naming_timed: 79. ऋ',\
 'literacy4_tt_grid_80': 'letter_naming_timed: 80. ण',\
 'literacy4_tt_grid.duration': 'letter_naming_timed: duration',\
 'literacy4_tt_grid.time_remaining': 'letter_naming_timed: time_remaining',\
 'literacy4_tt_grid.gridAutoStopped': 'letter_naming_timed: gridAutoStopped',\
 'literacy4_tt_grid.autoStop': 'letter_naming_timed: autoStop',\
 'literacy4_tt_grid.item_at_time': 'letter_naming_timed: item_at_time',\
 'literacy4_tt_grid.time_intermediate_captured': 'letter_naming_timed: time_intermediate_captured',\
 'literacy4_tt_grid.number_of_items_correct': 'letter_naming_timed: number_of_items_correct',\
 'literacy4_tt_grid.number_of_items_attempted': 'letter_naming_timed: number_of_items_attempted',\
 'literacy4_tt_grid.items_per_minute': 'letter_naming_timed: items_per_minute',\
 'literacy4_tt_end': 'letter_naming_timed: end',\
 'literacy5_p_q1': 'familiar_words_practice: मन',\
 'literacy5_p_q2': 'familiar_words_practice: कप',\
 'literacy5_ut_grid_1': 'familiar_words_untimed: 1. एक',\
 'literacy5_ut_grid_2': 'familiar_words_untimed: 2. चल',\
 'literacy5_ut_grid_3': 'familiar_words_untimed: 3. बाल',\
 'literacy5_ut_grid_4': 'familiar_words_untimed: 4. बतख',\
 'literacy5_ut_grid_5': 'familiar_words_untimed: 5. उछल',\
 'literacy5_ut_grid_6': 'familiar_words_untimed: 6. रंग',\
 'literacy5_ut_grid_7': 'familiar_words_untimed: 7. छाता',\
 'literacy5_ut_grid_8': 'familiar_words_untimed: 8. मेरा',\
 'literacy5_ut_grid_9': 'familiar_words_untimed: 9. जूठी',\
 'literacy5_ut_grid_10': 'familiar_words_untimed: 10. लड़का',\
 'literacy5_ut_grid_11': 'familiar_words_untimed: 11. बेटी',\
 'literacy5_ut_grid_12': 'familiar_words_untimed: 12. छड़ी',\
 'literacy5_ut_grid_13': 'familiar_words_untimed: 13. बादल',\
 'literacy5_ut_grid_14': 'familiar_words_untimed: 14. पपीता',\
 'literacy5_ut_grid_15': 'familiar_words_untimed: 15. अम्मा',\
 'literacy5_ut_grid.number_of_items_correct': 'familiar_words_untimed: number_of_items_correct',\
 'literacy5_ut_grid.number_of_items_attempted': 'familiar_words_untimed: number_of_items_attempted',\
 'literacy5_ut_grid.gridAutoStopped': 'familiar_words_untimed: gridAutoStopped',\
 'literacy5_ut_grid.autoStop': 'familiar_words_untimed: autoStop',\
 'literacy5_ut_end': 'familiar_words_untimed: end',\
 'literacy5_tt_grid_1': 'familiar_words_timed: 1. घर',\
 'literacy5_tt_grid_2': 'familiar_words_timed: 2. पल',\
 'literacy5_tt_grid_3': 'familiar_words_timed: 3. कल',\
 'literacy5_tt_grid_4': 'familiar_words_timed: 4. एक',\
 'literacy5_tt_grid_5': 'familiar_words_timed: 5. तब',\
 'literacy5_tt_grid_6': 'familiar_words_timed: 6. हम',\
 'literacy5_tt_grid_7': 'familiar_words_timed: 7. बस',\
 'literacy5_tt_grid_8': 'familiar_words_timed: 8. उस',\
 'literacy5_tt_grid_9': 'familiar_words_timed: 9. मन',\
 'literacy5_tt_grid_10': 'familiar_words_timed: 10. रथ',\
 'literacy5_tt_grid_11': 'familiar_words_timed: 11. अब',\
 'literacy5_tt_grid_12': 'familiar_words_timed: 12. छत',\
 'literacy5_tt_grid_13': 'familiar_words_timed: 13. वन',\
 'literacy5_tt_grid_14': 'familiar_words_timed: 14. नल',\
 'literacy5_tt_grid_15': 'familiar_words_timed: 15. चख',\
 'literacy5_tt_grid_16': 'familiar_words_timed: 16. आम',\
 'literacy5_tt_grid_17': 'familiar_words_timed: 17. सात',\
 'literacy5_tt_grid_18': 'familiar_words_timed: 18. गाल',\
 'literacy5_tt_grid_19': 'familiar_words_timed: 19. काम',\
 'literacy5_tt_grid_20': 'familiar_words_timed: 20. हाथ',\
 'literacy5_tt_grid_21': 'familiar_words_timed: 21. हरा',\
 'literacy5_tt_grid_22': 'familiar_words_timed: 22. बीज',\
 'literacy5_tt_grid_23': 'familiar_words_timed: 23. सिर',\
 'literacy5_tt_grid_24': 'familiar_words_timed: 24.तारा',\
 'literacy5_tt_grid_25': 'familiar_words_timed: 25. गिन',\
 'literacy5_tt_grid_26': 'familiar_words_timed: 26. सेब',\
 'literacy5_tt_grid_27': 'familiar_words_timed: 27. दिन',\
 'literacy5_tt_grid_28': 'familiar_words_timed: 28. खाना',\
 'literacy5_tt_grid_29': 'familiar_words_timed: 29. नदी',\
 'literacy5_tt_grid_30': 'familiar_words_timed: 30. नीम',\
 'literacy5_tt_grid_31': 'familiar_words_timed: 31. काला',\
 'literacy5_tt_grid_32': 'familiar_words_timed: 32. खेल',\
 'literacy5_tt_grid_33': 'familiar_words_timed: 33. मेरा',\
 'literacy5_tt_grid_34': 'familiar_words_timed: 34. पानी',\
 'literacy5_tt_grid_35': 'familiar_words_timed: 35. डाली',\
 'literacy5_tt_grid_36': 'familiar_words_timed: 36. नमन',\
 'literacy5_tt_grid_37': 'familiar_words_timed: 37. हिंदी',\
 'literacy5_tt_grid_38': 'familiar_words_timed: 38. उसका',\
 'literacy5_tt_grid_39': 'familiar_words_timed: 39. गाँव',\
 'literacy5_tt_grid_40': 'familiar_words_timed: 40. अमर',\
 'literacy5_tt_grid_41': 'familiar_words_timed: 41. उछल',\
 'literacy5_tt_grid_42': 'familiar_words_timed: 42. पपीता',\
 'literacy5_tt_grid_43': 'familiar_words_timed: 43. हमारा ',\
 'literacy5_tt_grid_44': 'familiar_words_timed: 44. अंदर',\
 'literacy5_tt_grid_45': 'familiar_words_timed: 45. जगह',\
 'literacy5_tt_grid_46': 'familiar_words_timed: 46. पतंग',\
 'literacy5_tt_grid_47': 'familiar_words_timed: 47. लड़का',\
 'literacy5_tt_grid_48': 'familiar_words_timed: 48. गणित',\
 'literacy5_tt_grid_49': 'familiar_words_timed: 49. हिस्सा',\
 'literacy5_tt_grid_50': 'familiar_words_timed: 50. तलवार',\
 'literacy5_tt_grid.duration': 'familiar_words_timed: duration',\
 'literacy5_tt_grid.time_remaining': 'familiar_words_timed: time_remaining',\
 'literacy5_tt_grid.gridAutoStopped': 'familiar_words_timed: gridAutoStopped',\
 'literacy5_tt_grid.autoStop': 'familiar_words_timed: autoStop',\
 'literacy5_tt_grid.item_at_time': 'familiar_words_timed: item_at_time',\
 'literacy5_tt_grid.time_intermediate_captured': 'familiar_words_timed: time_intermediate_captured',\
 'literacy5_tt_grid.number_of_items_correct': 'familiar_words_timed: number_of_items_correct',\
 'literacy5_tt_grid.number_of_items_attempted': 'familiar_words_timed: number_of_items_attempted',\
 'literacy5_tt_grid.items_per_minute': 'familiar_words_timed: items_per_minute',\
 'literacy5_tt_end': 'familiar_words_timed: end',\
 'literacy6_p_q1': 'non_words_practice: मब',\
 'literacy6_p_q2': 'non_words_practice: कग',\
 'literacy6_tt_grid_1': 'non_words: 1. टेब',\
 'literacy6_tt_grid_2': 'non_words: 2. रज',\
 'literacy6_tt_grid_3': 'non_words: 3. एट',\
 'literacy6_tt_grid_4': 'non_words: 4. कग',\
 'literacy6_tt_grid_5': 'non_words: 5. अन',\
 'literacy6_tt_grid_6': 'non_words: 6. नुप',\
 'literacy6_tt_grid_7': 'non_words: 7. डिट',\
 'literacy6_tt_grid_8': 'non_words: 8. लेब',\
 'literacy6_tt_grid_9': 'non_words: 9. गक',\
 'literacy6_tt_grid_10': 'non_words: 10. ईल',\
 'literacy6_tt_grid_11': 'non_words: 11. मक',\
 'literacy6_tt_grid_12': 'non_words: 12. रल',\
 'literacy6_tt_grid_13': 'non_words: 13. नेप',\
 'literacy6_tt_grid_14': 'non_words: 14. जक',\
 'literacy6_tt_grid_15': 'non_words: 15. मुप',\
 'literacy6_tt_grid_16': 'non_words: 16. तग',\
 'literacy6_tt_grid_17': 'non_words: 17. वुज',\
 'literacy6_tt_grid_18': 'non_words: 18. टर',\
 'literacy6_tt_grid_19': 'non_words: 19. मप',\
 'literacy6_tt_grid_20': 'non_words: 20. इम',\
 'literacy6_tt_grid_21': 'non_words: 21. सान',\
 'literacy6_tt_grid_22': 'non_words: 22. दोव',\
 'literacy6_tt_grid_23': 'non_words: 23. याग',\
 'literacy6_tt_grid_24': 'non_words: 24. टिक',\
 'literacy6_tt_grid_25': 'non_words: 25. किब',\
 'literacy6_tt_grid_26': 'non_words: 26. माब',\
 'literacy6_tt_grid_27': 'non_words: 27. गाक',\
 'literacy6_tt_grid_28': 'non_words: 28. टोब',\
 'literacy6_tt_grid_29': 'non_words: 29. हिग',\
 'literacy6_tt_grid_30': 'non_words: 30. नास',\
 'literacy6_tt_grid_31': 'non_words: 31. किब',\
 'literacy6_tt_grid_32': 'non_words: 32. अप',\
 'literacy6_tt_grid_33': 'non_words: 33. ऊग',\
 'literacy6_tt_grid_34': 'non_words: 34. तिग',\
 'literacy6_tt_grid_35': 'non_words: 35. कन',\
 'literacy6_tt_grid_36': 'non_words: 36. मिक',\
 'literacy6_tt_grid_37': 'non_words: 37. मोक',\
 'literacy6_tt_grid_38': 'non_words: 38. अला',\
 'literacy6_tt_grid_39': 'non_words: 39. वता',\
 'literacy6_tt_grid_40': 'non_words: 40. टोस',\
 'literacy6_tt_grid_41': 'non_words: 41. वानाको',\
 'literacy6_tt_grid_42': 'non_words: 42. इमारि',\
 'literacy6_tt_grid_43': 'non_words: 43. उलापू',\
 'literacy6_tt_grid_44': 'non_words: 44. औपसी',\
 'literacy6_tt_grid_45': 'non_words: 45. हितोसु',\
 'literacy6_tt_grid_46': 'non_words: 46. कीगामी',\
 'literacy6_tt_grid_47': 'non_words: 47. ओकमे',\
 'literacy6_tt_grid_48': 'non_words: 48. तरजम',\
 'literacy6_tt_grid_49': 'non_words: 49. प्रनार',\
 'literacy6_tt_grid_50': 'non_words: 50. मलकोप',\
 'literacy6_tt_grid.duration': 'non_words: duration',\
 'literacy6_tt_grid.time_remaining': 'non_words: time_remaining',\
 'literacy6_tt_grid.gridAutoStopped': 'non_words: gridAutoStopped',\
 'literacy6_tt_grid.autoStop': 'non_words: autoStop',\
 'literacy6_tt_grid.item_at_time': 'non_words: item_at_time',\
 'literacy6_tt_grid.time_intermediate_captured': 'non_words: time_intermediate_captured',\
 'literacy6_tt_grid.number_of_items_correct': 'non_words: number_of_items_correct',\
 'literacy6_tt_grid.number_of_items_attempted': 'non_words: number_of_items_attempted',\
 'literacy6_tt_grid.items_per_minute': 'non_words: items_per_minute',\
 'literacy6_end': 'non_words: end',\
 'literacy7_p_q1': 'orf_timed_practice: अब सुबह होगी',\
 'literacy7_tt_grid_1': 'orf_timed: 1. एक',\
 'literacy7_tt_grid_2': 'orf_timed: 2. चींटी',\
 'literacy7_tt_grid_3': 'orf_timed: 3. और',\
 'literacy7_tt_grid_4': 'orf_timed: 4. एक',\
 'literacy7_tt_grid_5': 'orf_timed: 5. कबूतर',\
 'literacy7_tt_grid_6': 'orf_timed: 6. दोस्त',\
 'literacy7_tt_grid_7': 'orf_timed: 7. थे',\
 'literacy7_tt_grid_8': 'orf_timed: 8. एक',\
 'literacy7_tt_grid_9': 'orf_timed: 9. दिन',\
 'literacy7_tt_grid_10': 'orf_timed: 10. चींटी',\
 'literacy7_tt_grid_11': 'orf_timed: 11. नदी',\
 'literacy7_tt_grid_12': 'orf_timed: 12. के',\
 'literacy7_tt_grid_13': 'orf_timed: 13. किनारे',\
 'literacy7_tt_grid_14': 'orf_timed: 14. खेल',\
 'literacy7_tt_grid_15': 'orf_timed: 15. रही',\
 'literacy7_tt_grid_16': 'orf_timed: 16. थी',\
 'literacy7_tt_grid_17': 'orf_timed: 17. तभी',\
 'literacy7_tt_grid_18': 'orf_timed: 18. वह',\
 'literacy7_tt_grid_19': 'orf_timed: 19. फिसल',\
 'literacy7_tt_grid_20': 'orf_timed: 20. कर',\
 'literacy7_tt_grid_21': 'orf_timed: 21. नदी',\
 'literacy7_tt_grid_22': 'orf_timed: 22. में',\
 'literacy7_tt_grid_23': 'orf_timed: 23. गिर',\
 'literacy7_tt_grid_24': 'orf_timed: 24. गई',\
 'literacy7_tt_grid_25': 'orf_timed: 25. कबूतर',\
 'literacy7_tt_grid_26': 'orf_timed: 26. ने',\
 'literacy7_tt_grid_27': 'orf_timed: 27. उसे',\
 'literacy7_tt_grid_28': 'orf_timed: 28. बचाने',\
 'literacy7_tt_grid_29': 'orf_timed: 29. के',\
 'literacy7_tt_grid_30': 'orf_timed: 30. लिए',\
 'literacy7_tt_grid_31': 'orf_timed: 31. एक',\
 'literacy7_tt_grid_32': 'orf_timed: 32. पत्ती',\
 'literacy7_tt_grid_33': 'orf_timed: 33. नदी',\
 'literacy7_tt_grid_34': 'orf_timed: 34. में',\
 'literacy7_tt_grid_35': 'orf_timed: 35. डाली',\
 'literacy7_tt_grid_36': 'orf_timed: 36. चींटी',\
 'literacy7_tt_grid_37': 'orf_timed: 37. उस',\
 'literacy7_tt_grid_38': 'orf_timed: 38. पत्ती',\
 'literacy7_tt_grid_39': 'orf_timed: 39. को',\
 'literacy7_tt_grid_40': 'orf_timed: 40. पकड़',\
 'literacy7_tt_grid_41': 'orf_timed: 41. कर',\
 'literacy7_tt_grid_42': 'orf_timed: 42. बच',\
 'literacy7_tt_grid_43': 'orf_timed: 43. गयी',\
 'literacy7_tt_grid.duration': 'ord_timed: duration',\
 'literacy7_tt_grid.time_remaining': 'orf_timed: time_remaining',\
 'literacy7_tt_grid.gridAutoStopped': 'orf_timed: gridAutoStopped',\
 'literacy7_tt_grid.autoStop': 'orf_timed: autoStop',\
 'literacy7_tt_grid.item_at_time': 'orf_timed: item_at_time',\
 'literacy7_tt_grid.time_intermediate_captured': 'orf_timed: time_intermediate_captured',\
 'literacy7_tt_grid.number_of_items_correct': 'orf_timed: number_of_items_correct',\
 'literacy7_tt_grid.number_of_items_attempted': 'orf_timed: number_of_items_attempted',\
 'literacy7_tt_grid.items_per_minute': 'orf_timed: items_per_minute',\
 'literacy7_tt_q1': 'orf_timed: शब्दों को अक्षर में तोड़कर पढ़ा',\
 'literacy7_tt_q2': 'orf_timed: कुछ शब्दों को जोड़कर पढ़ा लेकिन पूरा वाक्य नहीं बना पाया',\
 'literacy7_tt_q3': 'orf_timed: शब्दों को जोड़कर पूरा वाक्य पढ़ा',\
 'literacy7_tt_q4': 'orf_timed: विराम चिन्ह समझकर पढ़ा',\
 'literacy7_end': 'orf_timed: end',\
 'literacy8_ut_grid_1': 'orf_untimed: 1. मिनी',\
 'literacy8_ut_grid_2': 'orf_untimed: 2. एक',\
 'literacy8_ut_grid_3': 'orf_untimed: 3. बिल्ली',\
 'literacy8_ut_grid_4': 'orf_untimed: 4. है।',\
 'literacy8_ut_grid_5': 'orf_untimed: 5. मिनी',\
 'literacy8_ut_grid_6': 'orf_untimed: 6. रोज़',\
 'literacy8_ut_grid_7': 'orf_untimed: 7. चूहा',\
 'literacy8_ut_grid_8': 'orf_untimed: 8. पकड़ती',\
 'literacy8_ut_grid_9': 'orf_untimed: 9. है।',\
 'literacy8_ut_grid_10': 'orf_untimed: 10. एक',\
 'literacy8_ut_grid_11': 'orf_untimed: 11. दिन',\
 'literacy8_ut_grid_12': 'orf_untimed: 12. मिनी',\
 'literacy8_ut_grid_13': 'orf_untimed: 13. चूहा',\
 'literacy8_ut_grid_14': 'orf_untimed: 14. नहीं',\
 'literacy8_ut_grid_15': 'orf_untimed: 15. पकड़',\
 'literacy8_ut_grid_16': 'orf_untimed: 16. पाई।',\
 'literacy8_ut_grid_17': 'orf_untimed: 17. वह',\
 'literacy8_ut_grid_18': 'orf_untimed: 18. भूखी',\
 'literacy8_ut_grid_19': 'orf_untimed: 19. थी।',\
 'literacy8_ut_grid_20': 'orf_untimed: 20. वह',\
 'literacy8_ut_grid_21': 'orf_untimed: 21. दूध',\
 'literacy8_ut_grid_22': 'orf_untimed: 22. पीने',\
 'literacy8_ut_grid_23': 'orf_untimed: 23. गई।',\
 'literacy8_ut_grid_24': 'orf_untimed: 24. रसोई',\
 'literacy8_ut_grid_25': 'orf_untimed: 25. में',\
 'literacy8_ut_grid_26': 'orf_untimed: 26. दूध',\
 'literacy8_ut_grid_27': 'orf_untimed: 27. मिला।',\
 'literacy8_ut_grid_28': 'orf_untimed: 28. मिनी',\
 'literacy8_ut_grid_29': 'orf_untimed: 29. सारा',\
 'literacy8_ut_grid_30': 'orf_untimed: 30. दूध',\
 'literacy8_ut_grid_31': 'orf_untimed: 31. पी',\
 'literacy8_ut_grid_32': 'orf_untimed: 32. गयी',\
 'literacy8_ut_grid_33': 'orf_untimed: 33. और',\
 'literacy8_ut_grid_34': 'orf_untimed: 34. ख़ुशी',\
 'literacy8_ut_grid_35': 'orf_untimed: 35. से',\
 'literacy8_ut_grid_36': 'orf_untimed: 36. चली',\
 'literacy8_ut_grid_37': 'orf_untimed: 37. गई।',\
 'literacy8_ut_grid.number_of_items_correct': 'orf_untimed: number_of_items_correct',\
 'literacy8_ut_grid.number_of_items_attempted': 'orf_untimed: number_of_items_attempted',\
 'literacy8_ut_grid.gridAutoStopped': 'orf_untimed: gridAutoStopped',\
 'literacy8_ut_grid.autoStop': 'orf_untimed: autoStop',\
 'literacy8_ut_q1': 'orf_untimed: 1. मिनी कौन है?',\
 'literacy8_ut_q2': 'orf_untimed: 2. मिनी को क्या अच्छा लगता है?',\
 'literacy8_ut_q3': 'orf_untimed: 3. वह दूध क्यों पीने लगी?',\
 'literacy8_ut_q4': 'orf_untimed: 4. क्या मिनी को दूध अच्छा लगता है?',\
 'literacy8_ut_q5': 'orf_untimed: 5. मिनी को दूध पीकर कैसा लगा?',\
 'literacy8_ut_q6': 'orf_untimed: 6. मिनी को दूध कहाँ मिला?',\
 'literacy8_end': 'orf_untimed: end',\
 'literacy9a_ut_grid_1': 'dictation_letters: 1. फ',\
 'literacy9a_ut_grid_2': 'dictation_letters: 2. छ',\
 'literacy9a_ut_grid_3': 'dictation_letters: 3. ह',\
 'literacy9a_ut_grid_4': 'dictation_letters: 4. थ',\
 'literacy9a_ut_grid_5': 'dictation_letters: 5. श',\
 'literacy9a_ut_grid_6': 'dictation_letters: 6. ज',\
 'literacy9a_ut_grid_7': 'dictation_letters: 7. य',\
 'literacy9a_ut_grid_8': 'dictation_letters: 8. व',\
 'literacy9a_ut_grid_9': 'dictation_letters: 9. भ',\
 'literacy9a_ut_grid_10': 'dictation_letters: 10. झ',\
 'literacy9a_ut_grid.number_of_items_correct': 'dictation_letters: number_of_items_correct',\
 'literacy9a_ut_grid.number_of_items_attempted': 'dictation_letters: number_of_items_attempted',\
 'literacy9a_ut_grid.gridAutoStopped': 'dictation_letters: gridAutoStopped',\
 'literacy9a_ut_grid.autoStop': 'dictation_letters: autoStop',\
 'literacy9b_ut_grid_1': 'dictation_words: 1. नल',\
 'literacy9b_ut_grid_2': 'dictation_words: 2. चख',\
 'literacy9b_ut_grid_3': 'dictation_words: 3. घर',\
 'literacy9b_ut_grid_4': 'dictation_words: 4. कम',\
 'literacy9b_ut_grid_5': 'dictation_words: 5. नाम',\
 'literacy9b_ut_grid_6': 'dictation_words: 6. चीनी',\
 'literacy9b_ut_grid_7': 'dictation_words: 7. कलम',\
 'literacy9b_ut_grid_8': 'dictation_words: 8. मचल',\
 'literacy9b_ut_grid_9': 'dictation_words: 9. दरी',\
 'literacy9b_ut_grid_10': 'dictation_words: 10. मकान',\
 'literacy9b_ut_grid.number_of_items_correct': 'dictation_words: number_of_items_correct',\
 'literacy9b_ut_grid.number_of_items_attempted': 'dictation_words: number_of_items_attempted',\
 'literacy9b_ut_grid.gridAutoStopped': 'dictation_words: gridAutoStopped',\
 'literacy9b_ut_grid.autoStop': 'dictation_words: autoStop',\
 'literacy9_end': 'dictation: end'}

In [34]:
mp_raw_lit.rename(columns=cols_renamed, inplace=True)
up_raw_lit.rename(columns=cols_renamed, inplace=True)

In [35]:
reordered_cols = [col for col in cols_renamed.values()]

In [36]:
mp_raw_lit = mp_raw_lit.loc[:, reordered_cols]
up_raw_lit = up_raw_lit.loc[:, reordered_cols]

In [37]:
mp_grouped = mp_raw_lit.groupby('udise')['student_name'].count().to_frame('cluster_size')

up_grouped = up_raw_lit.groupby('udise')['student_name'].count().to_frame('cluster_size')

In [38]:
mp_included_udise = mp_grouped[mp_grouped.cluster_size >=3].index.to_list()
mp_raw_lit_excluded = mp_raw_lit[~mp_raw_lit['udise'].isin(mp_included_udise)]

up_included_udise = up_grouped[up_grouped.cluster_size >=5].index.to_list()
up_raw_lit_excluded = up_raw_lit[~up_raw_lit['udise'].isin(up_included_udise)]

In [39]:
mp_raw_lit = mp_raw_lit[mp_raw_lit['udise'].isin(mp_included_udise)]

up_raw_lit = up_raw_lit[up_raw_lit['udise'].isin(up_included_udise)]

In [42]:
supervisors = ['Manohar', 'Jeevan', 'Vikram']

mp_irr_filter = mp_raw_lit.duplicated(subset=['udise', 'student_name', 'student_roll'],keep=False)

mp_irr_assessments = mp_raw_lit[mp_irr_filter & mp_raw_lit.evaluator_name.isin(supervisors)]

mp_raw_lit.drop(labels=mp_irr_assessments.index, inplace=True)

In [45]:
up_supervisors = ['Ambrish', 'Ashfak', 'Hina', 'Prrmshankar', 'Saurabh', 'Kuldeep']

up_irr_filter = up_raw_lit.duplicated(subset=['udise', 'student_name', 'student_roll'],keep=False)

up_irr_assessments = up_raw_lit[up_irr_filter & up_raw_lit.evaluator_name.isin(up_supervisors)]

up_raw_lit.drop(labels=up_irr_assessments.index, inplace=True)

In [52]:
with pd.ExcelWriter('mp_lit_cleaned_scored.xlsx') as writer:
    mp_raw_lit.to_excel(writer, sheet_name='raw_data')
    mp_lit_scores.to_excel(writer, sheet_name='total_scores')
    mp_irr_assessments.to_excel(writer, sheet_name='irr_assessments')
    mp_raw_lit_excluded.to_excel(writer, sheet_name='<3_cluster_size')

In [53]:
with pd.ExcelWriter('up_lit_cleaned_scored.xlsx') as writer:
    up_raw_lit.to_excel(writer, sheet_name='raw_data')
    up_lit_scores.to_excel(writer, sheet_name='total_scores')
    up_irr_assessments.to_excel(writer, sheet_name='irr_assessments')
    up_raw_lit_excluded.to_excel(writer, sheet_name='<5_cluster_size')

In [49]:
# for dataframe in [mp_raw_lit, up_raw_lit]:
#     file_name = dataframe.name + '_cleaned_v2.xlsx'
#     dataframe.loc[:, cols].to_excel(file_name)

<h2>
    B. Numeracy Sub-tasks Data Cleaning
</h2>

<h3>
    Numeracy 1: Counting
</h3>

In [50]:
# Check data for'UNDEFINED' or 'SKIPPED' values
for dataframe in [mp_raw_num, up_raw_num]:
    num1_raw = [col for col in dataframe.columns if re.search(r'numeracy1_tt_grid_\d*$', col)]
    print(f'\033[1mCleaning numeracy {dataframe.name} subtask 1 (timed) scores\033[0m')
    clean_scores(num1_raw, dataframe)
    dataframe.loc[:, num1_raw] = pd.DataFrame((dataframe.apply(lambda x: fix_counting_score([x[col] for col in num1_raw]), axis=1)).to_list(), index=dataframe.index, columns=num1_raw)
    
#     dataframe.loc[:, 'literacy5_ut_total'] = dataframe.apply(lambda x: total_score([x[score] for score in lit5_ut_raw]), axis=1)

[1mCleaning numeracy MP numeracy subtask 1 (timed) scores[0m


NameError: name 'clean_scores' is not defined

<h3>
    Numeracy 2: Number Recognition (Untimed)
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
for dataframe in [mp_raw_num, up_raw_num]:
    num2_ut_raw = [col for col in dataframe.columns if re.search(r'numeracy2_ut_grid_\d*$', col)]
    print(f'\033[1mCleaning numeracy {dataframe.name} subtask 2 (untimed) scores\033[0m')
    clean_scores(num2_ut_raw, dataframe)
    dataframe.loc[:, num2_ut_raw] = pd.DataFrame((dataframe.apply(lambda x: fix_score([x[col] for col in num2_ut_raw]), axis=1)).to_list(), index=dataframe.index, columns=num2_ut_raw)
    
    dataframe.loc[:, 'numeracy2_ut_total'] = dataframe.apply(lambda x: total_score([x[score] for score in num2_ut_raw]), axis=1)

<h3>
    Numeracy 2: Number Recognition (Timed)
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
for dataframe in [mp_raw_num, up_raw_num]:
    num2_ut_raw = [col for col in dataframe.columns if re.search(r'numeracy2_ut_grid_\d*$', col)]
    print(f'\033[1mCleaning numeracy {dataframe.name} subtask 2 (untimed) scores\033[0m')
    clean_scores(num2_ut_raw, dataframe)
    dataframe.loc[:, num2_ut_raw] = pd.DataFrame((dataframe.apply(lambda x: fix_score([x[col] for col in num2_ut_raw]), axis=1)).to_list(), index=dataframe.index, columns=num2_ut_raw)
    
    dataframe.loc[:, 'numeracy2_ut_total'] = dataframe.apply(lambda x: total_score([x[score] for score in num2_ut_raw]), axis=1)
numeracy2_tt_raw = [col for col in numeracy2_tt if re.search(r'numeracy2_tt_grid_', col)]


In [None]:
mp_numeracy.loc[:, numeracy2_tt_raw] = pd.DataFrame((mp_numeracy.apply(lambda x: fix_score([x[col] for col in numeracy2_tt_raw]), axis=1)).to_list(), index=mp_numeracy.index, columns=numeracy2_tt_raw)

<h3>
    Numeracy 3: Number Comparison
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
for col in numeracy3:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy.loc[:, col].fillna('999', inplace=True)
    mp_numeracy[col].replace('UNDEFINED', '999', inplace=True)
    mp_numeracy[col].replace('SKIPPED', '999', inplace=True)
    mp_numeracy[col].replace('.', '999', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

<h3>
    Numeracy 4: Counting in Bundles
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
numeracy4_raw = [col for col in numeracy4 if re.search(r'numeracy4.+\d$', col)]
for col in numeracy4_raw:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy.loc[:, col].fillna('999', inplace=True)
    mp_numeracy[col].replace('UNDEFINED', '999', inplace=True)
    mp_numeracy[col].replace('SKIPPED', '999', inplace=True)
    mp_numeracy[col].replace('.', '999', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

<h3>
    Numeracy 5: Missing Numbers
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
numeracy5_raw = [col for col in numeracy5 if re.search(r'numeracy5.+\d$', col)]
for col in numeracy5_raw:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy.loc[:, col].fillna('999', inplace=True)
    mp_numeracy[col].replace('UNDEFINED', '999', inplace=True)
    mp_numeracy[col].replace('SKIPPED', '999', inplace=True)
    mp_numeracy[col].replace('.', '999', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

<h3>
    Numeracy 6: Addition
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
numeracy6_raw = [col for col in numeracy6 if re.search(r'numeracy6.+\d$', col)]
for col in numeracy6_raw:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy.loc[:, col].fillna('999', inplace=True)
    mp_numeracy[col].replace('UNDEFINED', '999', inplace=True)
    mp_numeracy[col].replace('SKIPPED', '999', inplace=True)
    mp_numeracy[col].replace('.', '999', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

<h3>
    Numeracy 7: Subtraction
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
numeracy7_raw = [col for col in numeracy7 if re.search(r'numeracy7.+\d$', col)]
for col in numeracy7_raw:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy.loc[:, col].fillna('999', inplace=True)
    mp_numeracy[col].replace('UNDEFINED', '999', inplace=True)
    mp_numeracy[col].replace('SKIPPED', '999', inplace=True)
    mp_numeracy[col].replace('.', '999', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

<h3>
    Numeracy 8: Word Problems
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
numeracy8_raw = [col for col in numeracy8 if re.search(r'numeracy8.+\d$', col)]
for col in numeracy8_raw:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy.loc[:, col].fillna('999', inplace=True)
    mp_numeracy[col].replace('UNDEFINED', '999', inplace=True)
    mp_numeracy[col].replace('SKIPPED', '999', inplace=True)
    mp_numeracy[col].replace('.', '999', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

<h3>
    Numeracy 9a: Shape Recognition (Circle)
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
numeracy9a_raw = [col for col in numeracy9a if re.search(r'numeracy9a_ut_grid_', col)]
for col in numeracy9a_raw:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy[col].replace('UNDEFINED', '0', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

<h3>
    Numeracy 9b: Shape Recognition (Rectangle)
</h3>

In [None]:
# Check data for'UNDEFINED' or 'SKIPPED' values
numeracy9b_raw = [col for col in numeracy9b if re.search(r'numeracy9b_ut_grid_', col)]
for col in numeracy9b_raw:
    print(f"Unique values in {col} = {mp_numeracy[col].unique()}")
    print(f"No. of NaN values in {col} = {mp_numeracy.loc[:, col].isna().sum()}")
    print(f"No. of UNDEFINED values in {col} = {mp_numeracy[mp_numeracy[col] == 'UNDEFINED'].shape[0]}")    
    print(f"No. of SKIPPED values in {col} = {mp_numeracy[mp_numeracy[col] == 'SKIPPED'].shape[0]}\n")
    mp_numeracy[col].replace('UNDEFINED', '0', inplace=True)
    mp_numeracy.loc[:, col] = mp_numeracy.loc[:, col].astype('str')

In [None]:
mp_numeracy.to_excel("mp_raw_numeracy_full.xlsx")