Import appropriate packages:

In [57]:
import config, MySQLdb, pandas, warnings, numpy

Establishing connection to database:

In [8]:
chompsky_authenticator = config.Authenticator('Chompsky')
chompsky_con= MySQLdb.connect(host=chompsky_authenticator.host, 
                port=chompsky_authenticator.port,user=chompsky_authenticator.user, passwd=chompsky_authenticator.passwd, 
                db='childes')

Test #1: Ensure that the stated speaker of the word is also among the participants in that conversation. 

In [126]:
def speaker_included(num_inst):
    people = pandas.read_sql("SELECT speaker, participants FROM words ORDER BY RAND() LIMIT %(num)s", 
                             chompsky_con, params = {"num":num_inst})

    failed = False
    for i in range(0, len(people.index)):
        speaker = people.ix[i, 'speaker']
        participants = people.ix[i, 'participants']
        if speaker not in participants:
            warnings.warn("Test 1: FAIL - Recorded speaker " + speaker + " is not amongst conversation participants " + participants + ".")
            failed = True

    if not failed: print("Test 1: PASS - All speakers are accounted for amongst conversation participants.")

speaker_included(20)


Test 1: PASS - All speakers are accounted for amongst conversation participants.


Test #2: Verify that the information recorded for each child's utterance in "words" is consistent with the child's information in "children".

In [124]:
# To be resolved: Handling equality between Bloom and Bloom70.
# To be resolved: Handling empty cells in database table. 

def child_verify(num_inst):
    child_info = pandas.read_sql("""SELECT w.child AS 'W_Child', 
                                            w.age AS 'W_Age', 
                                            w.gender AS 'W_Gender', 
                                            w.corpus AS 'W_Corpus', 
                                            c.Gender AS 'C_Gender', 
                                            c.age AS 'C_Age', 
                                            c.Corpus AS 'C_Corpus' 
                                            FROM words AS w, children AS c 
                                            WHERE w.child = c.Child AND 
                                                    w.speaker = 'CHI' AND 
                                                    w.corpus = c.corpus AND 
                                                    w.gender != '' ORDER BY RAND() LIMIT %(num)s""", 
                                chompsky_con, params = {"num":num_inst})

    failed = False
    for col in child_info:
        child_info[col].replace('', numpy.nan, inplace = True)
    child_info = child_info.dropna(subset = list(child_info))

    gender_comp = child_info.loc[:, 'W_Gender'] == child_info.loc[:, 'C_Gender']
    if not all(gender_comp.tolist()):
        warnings.warn("Test 2: FAIL - The gender of a child has been misrepresented.")
        failed = True

    for i in range(0, len(child_info.index)):
        words_age = child_info.ix[i, 'W_Age']
        age_range = child_info.ix[i, 'C_Age'].split('-')
        if not float(words_age) >= float(age_range[0]) and float(words_age) <= float(age_range[1]):
            warnings.warn("Test 2: FAIL - The child's age " + str(words_age) + " is not in the appropriate range: " + str(age_range))
            failed = True

    if not failed: print("Test 2: PASS - The children's information is consistent across tables.")

child_verify(50)

Test 2: PASS - The children's information is consistent across tables.


Test #3: Ensure that the word being spoken appears at some point within the gloss sentence / utterance.

In [123]:
def word_in_sent(num_inst):
    spoken_words = pandas.read_sql("SELECT gloss, sentgloss FROM words ORDER BY RAND() LIMIT %(num)s", 
                                   chompsky_con, params = {"num":num_inst})

    failed = False
    for i in range(0, len(spoken_words.index)):
        word = spoken_words.ix[i, 'gloss']
        sent = spoken_words.ix[i, 'sentgloss']
        if word not in sent:
            warnings.warn("Test 3: FAIL - Word " + word + ' not found within utterance ' + sent + '.')
            failed = True

    if not failed: print("Test 3: PASS - Words verified within utterances.")

word_in_sent(20)

Test 3: PASS - Words verified within utterances.


Test #4: Check that searching the same utterance number returns the same utterance for a child.

In [127]:
def num_to_utter(id, child, age):
    corres_words = pandas.read_sql("SELECT sentgloss FROM words WHERE utt_number = %(id)s AND child = %(child)s AND age = %(age)s", 
                                   chompsky_con, params = {"id": id, "child":child, "age":age})
    corres_words = corres_words.loc[:, "sentgloss"].tolist()
    if corres_words[1:] != corres_words[:-1]:
        warnings.warn("Test 4: FAIL - The given parameters returned the following different utterances:")
        print(corres_words)
    else:
        print("Test 4: PASS - All utterances corresponding to these parameters were identical.")

num_to_utter(4, "Eric", 613)

Test 4: PASS - All utterances corresponding to these parameters were identical.


Test #5: Verify that basic "gloss" words have been translated to the proper corresponding "mor" structure.

In [130]:
# To be resolved: Handling Nonetypes amongst "mor" structures.
def gloss_check(dict):
    keys = tuple(dict.keys())
    basic_words = pandas.read_sql("SELECT gloss, mor FROM words WHERE gloss IN %(keys)s", 
                                  chompsky_con, params = {"keys":keys})
    words = basic_words.loc[:, "gloss"].tolist()
    struct = basic_words.loc[:, "mor"].tolist()
    incorrect_words = []
    for i in range(0, len(words)):
        if dict[words[i]] != struct[i]:
            if struct[i] == None:
                incorrect_words.append([words[i], "", dict[words[i]]])
            else:
                incorrect_words.append([words[i], struct[i], dict[words[i]]])
            
    if incorrect_words != []:
        warnings.warn("Test 5: FAIL - The following word, structure pairing failed to match the provided mapping:")
        for set in incorrect_words:
#             print("Word: " + set[0] + ", Structure: " + set[1] + ", Correct Structure: " + set[2])
    else:
        print("Test 5: PASS - The basic set of words provided all matched the required structure.")

# gloss_check({"it":"pro|it", "It":"n:prop|It"})
# gloss_check({"a":"art|a", "A":"n:prop|A"})

Final Test Suite

In [None]:
# Test 1
speaker_included(20)

# Test 2
child_verify(50)

# Test 3
word_in_sent(20)

# Test 4
num_to_utter(4, "Eric", 613)

# Test 5
gloss_check({"a":"art|a", "A":"n:prop|A"})