# Data augmentation

## 1 Current data statistics

### We read in the files: 
of queries, logical forms, and schema, and categorize them by length; within the same length, there would be subcategories

In [1]:
import numpy as np
import random
import math

logic_category_len = dict()
query_len = dict()
schema_len = dict()
with open('./rand.lo') as f_lo:
    with open('./rand.qu') as f_qu:
        with open('./rand.fi') as f_fi:
            logic_line, query_line, schema_line = f_lo.readline(), f_qu.readline(), f_fi.readline()
            while logic_line and query_line and schema_line:
                logic = logic_line.split()
#                 if len(logic) == 13:
#                     if logic[4] == 'less':
#                         logic[0] = 'argmax'
#                     else:
#                         logic[0] = 'argmin'
#                     logic.insert(2, logic[3])
                length = len(logic)
#                 if length ==0:
#                     continue
                if length not in logic_category_len:
                    logic_category_len[length] = []
                    query_len[length] = []
                    schema_len[length] = []
                logic_category_len[length].append(logic_line)
                query_len[length].append(query_line)
                schema_len[length].append(schema_line)
                logic_line, query_line, schema_line = f_lo.readline(), f_qu.readline(), f_fi.readline()
for key in logic_category_len.keys():
    value = logic_category_len[key]
    print 'length = %d, total examples: %d' %(key, len(value))

length = 2, total examples: 2
length = 4, total examples: 156
length = 6, total examples: 1253
length = 7, total examples: 4
length = 8, total examples: 624
length = 10, total examples: 687
length = 11, total examples: 697
length = 12, total examples: 488


Have a look at the data:

In [7]:
for i in range(len(logic_category_len[8])):
    #print i
    print query_len[8][i]
    #print logic_category_len[14][i]

who has the least apps and greater than 22 goals

which year was the latest year that havana was the 2nd_venue

who has the most field_goals with an average of 50

which year was the most recent year cambridge was the 2nd_venue

who has the least apps and greater than 114 goals

which year was the latest year that boston was the 2nd_venue

when was the first time nassau was 2nd_venue

what is the amount of the most goals before 2000

who has the least runs with an average above 20

who has the most goals but less than 268 apps

when was the first time detroit was 2nd_venue

who has the least matches with an average above 70

who has the most free_throws with an average above 30

who has the least goals and less than 253 apps

what is the amount of the least goals before 2016

what is the amount of the least goals before 2014

which state had the least number_of_candidates in 2015

what is the amount of the least goals after 2001

who has the most innings with an average of 80

what is 

### Now we collect all different schema in a list for later use

In [19]:
schema_collect = []
with open('./rand.fi') as f_fi:
    for line in f_fi:
        if line in schema_collect:
            continue
        schema_collect.append(line)
    
schema_collect[2] = "State Year_of_Election No._of_candidates No._of_elected Total_no._of_seats_in_Assembly \n"
schema_collect[7] = "Year 1st_Venue 2nd_Venue 3rd_Venue 4th_Venue 5th_Venue 6th_Venue \n"
schema_collect[3] = "Team Years_won County Wins Areas Prices \n"
schema_collect[4] = "Player Matches Innings Runs Average 100s 50s Games_Played Field_Goals Free_Throws Points \n"

schema_collect[6] = "Discipline Amanda Bernie Javine_H Julia Michelle \n"
schema_collect[8] = "Swara Position Short_name Notation Mnemonic \n"
schema_collect[7] = "Nation Name Position League_Apps League_Goals FA_Cup_Apps FA_Cup_Goals Total_Apps Total_Goals \n"
schema_collect[9] = "Year 1st_Venue 2nd_Venue 3rd_Venue 4th_Venue 5th_Venue 6th_Venue \n"

for schema in schema_collect:
    print schema

Nation Rank Gold Silver Bronze Total

Name Year_inducted Position Apps Goals

State Year_of_Election No._of_candidates No._of_elected Total_no._of_seats_in_Assembly 

Team Years_won County Wins Areas Prices 

Player Matches Innings Runs Average 100s 50s Games_Played Field_Goals Free_Throws Points 

Country Masters U.S._Open The_Open PGA Total

Discipline Amanda Bernie Javine_H Julia Michelle 

Nation Name Position League_Apps League_Goals FA_Cup_Apps FA_Cup_Goals Total_Apps Total_Goals 

Swara Position Short_name Notation Mnemonic 

Year 1st_Venue 2nd_Venue 3rd_Venue 4th_Venue 5th_Venue 6th_Venue 

Menteri_Besar Took_office Left_office Party



## 2 Data Preparation and Generation

### Next we do some data generation, the first goal is to double our current data size (8k~10k) 

As we previously did some work in the file ./data_prep/categorization.txt, we have several different sentences for a single length category. For each sentence structure, we first see whether it could applied to all or several schema, or just a single schema; then we tag each sentence, and for 'field' and 'value', we do data recombination for both query and logical forms; finally we add noise and replace synonyms in the queries to further complicate the sentence structrue.

Let's start with the easiest length = 4:

In [3]:
import os,sys,inspect
import random
import numpy as np

import tagger as tg
import tag_utils as tu
from nltk.parse import stanford
from nltk import tree

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)
os.environ['STANFORD_PARSER'] = '/Users/richard_xiong/Documents/DeepLearningMaster/deep_parser'
os.environ['STANFORD_MODELS'] = '/Users/richard_xiong/Documents/DeepLearningMaster/deep_parser'

#parsequery = "which nation has less than 6 <field:1> but its <field:2> medals are more than 14 "
#parsequery = "when the <field:1> was beijing and <field:2> was dubai , which city was the most recent <field:4>"
#parsequery = "for <field:0> with more than 400 <field:1> and <field:2> less than 14 , <field:0> has the most <field:3>"
# parsequery = "which state had the largest <field:1>, and its <field:2> are within 12 and 15"
# dependency_tree = parser.raw_parse_sents(('Hello, My name is Melroy', parsequery))

# for line in dependency_tree[1]:
#     line.draw()

Importing GloVe pretrained word vectors
Replacing GloVe word vectors as initialization


In order to find which value corresponds to which field, we need to first find:
1. the lowest common ancestor for each (value, field) pairs
2. for each value, all different ancestors are belong to different levels, the deepest one, which should be the subtree for all the others, would contain the correspondence pair

Possible functions:

leaf_treeposition(self, index) ---> return: The tree position of the ``index``-th leaf in this
            tree.  I.e., if ``tp=self.leaf_treeposition(i)``, then
            ``self[tp]==self.leaves()[i]``.

treeposition_spanning_leaves(self, start, end) ---> The tree position of the lowest descendant of this
            tree that dominates ``self.leaves()[start:end]``.

convert(cls, tree) ---> to subtype of Tree, say, ParentTree

e.g.
(0, 0, 1, 0, 0, 1, 0)
(0, 0, 1, 0, 1, 1, 0, 0)
(0, 0, 1, 2, 0, 0, 0)
(0, 0, 1, 2, 1, 1, 0, 0)

In [4]:
#field_corr_dicts = [{'County': ['Louth', 'Dublin', 'Kildare', 'Laois', 'Wicklow'], 'Team': ['Ireland', 'Spain', 'Cyprus', 'Mexico', 'Maynooth']}, \
#                    {'Prices': [28, 62, 72, 9, 40], 'Wins': [52, 80, 42, 76, 29], 'Areas': [38, 1, 7, 83, 98]}]

def isRepetitive(sequence):
    for element in sequence[:-1]:
        if element == sequence[-1]:
            return True
    return False

def generateFieldCombs(field_corr_dicts):
    ''' If only fields are recombinable'''
    list_of_seqs = []
    if len(field_corr_dicts) == 1:
        # base case:
        for key in field_corr_dicts[0].keys():
            list_of_seqs.append([key])
    else:
        # recursive case:
        former_seqs = generateFieldCombs(field_corr_dicts[:-1])
        for key in field_corr_dicts[-1].keys():
            for seq in former_seqs:
                newseq = [x for x in seq]
                newseq.append(key)
                # check new repetitive elements
                if not isRepetitive(newseq):
                    list_of_seqs.append(newseq)
    return list_of_seqs

def generateValueCombs(field_corr_dicts, field_combination, qu_value):
    ''' Both fields and values are recombinable
        arguments --- field_combination: the selected field combination, where the value are to be decided
    '''
    list_of_seqs = []
    if len(qu_value) == 1:
        # base case:
        _, idx = qu_value[0]  # check position of values
        for value in field_corr_dicts[idx][field_combination[idx]]:
            list_of_seqs.append([value])
    else:
        # recursive case:
        former_seqs = generateValueCombs(field_corr_dicts, field_combination, qu_value[:-1])
        _, idx = qu_value[-1]
        for value in field_corr_dicts[idx][field_combination[idx]]:
            for seq in former_seqs:
                newseq = [x for x in seq]
                newseq.append(value)
                # check new repetitive elements
                if not isRepetitive(newseq):
                    list_of_seqs.append(newseq)
    return list_of_seqs

#print generateFieldCombs(field_corr_dicts)
#print generateValueCombs(field_corr_dicts, ['County', 'Wins'], [(2,0), (4,1)])

In [5]:
def isContainFieldType(schema_type, field_corr):
    ''' To determine whether all the types in field_corr appear in the schema
        e.g. field_corr = ['string', 'string', 'int'], an available schema should at least contain
        two 'string type' and one 'int' type
        arguments --- schema_type: a list of corresponding types of the schema
                      field_corr: a list of field types appeared in the query
        return --- True or False
    '''
    small_dict = dict() # for field_corr
    big_dict = dict()   # for schema_type
    # build dictionaries
    for field in field_corr:
        if field not in small_dict:
            small_dict[field] = 0
        small_dict[field] += 1
    for field in schema_type:
        if field not in big_dict:
            big_dict[field] = 0
        big_dict[field] += 1
    for key in small_dict.keys():
        if key not in big_dict:
            return False
        elif big_dict[key] < small_dict[key]:
            return False
    return True

def schemaRecommend(schema_idx, field_corr_old, special_code=False):
    ''' From the old generated field correspondence (string), transform to a new field correspondence, 
        represented by a list value_types, and from the set of value types to get the possible schemas 
        (PLURALS) that could use for augmentation later (check that all the types in field_corr_new 
        should be in each schema)
        arguments --- schema_idx: the index number of the original query is based on  
                      special_code: might be used to indicate that the schema is not tranferrable. 
                      default False, means able to generalize to schema that contain corresponding field 
                      types; if True, means only applicable to original schema.
        return --- field_corr_new: a list of value_types
                   schemas: several schema that the template could augment to, each contain all the
                   value_types needed; also see 'special_code'.
    '''
    config = tu.Config()  # Contain the schema_collect and schema_collect_type information
    field_corr_old = field_corr_old.split()
    field_corr_new = ['' for x in field_corr_old]
    for i in range(len(field_corr_old)):
        field_type = config.field2word[field_corr_old[i]]['value_type']
        field_corr_new[i] = field_type
    
    schemas = []
    if special_code:
        # only the original schema goes into the next stage
        schemas.append(config.schema_collect[schema_idx])
        return field_corr_new, schemas
    
    # length = len(config.schema_collect)
    length = 9 # ONLY take the first 9 schema
    for j in range(length):
        # print '*** schema %d ***' %j
        if isContainFieldType(config.schema_collect_type[j], field_corr_new):
            schemas.append(config.schema_collect[j])
    
    return field_corr_new, schemas
#schemaRecommend(5, 'PGA Country')

In [6]:
# field_corr_new, schema_aug = schemaRecommend(5, 'PGA Country')
# field_corr_new = ['int','string']
# schema_aug = main_config.schema_collect[0:8]

def augment(quTemp, loTemp, field_corr, schema_aug):
    ''' Data augmentation from a pair of query template and logical template
        arguments --- field_corr: a list of value_types e.g. ['string','ordinal','int'], each idx should 
                      correspond to the postion in the templates
                      schemas: PLURALS HERE! several schemas that the template could augment to.
        return --- collections of queries, logics, and fields
    '''
    queryCollect, logicCollect, fieldCollect = [], [], []
    config = tu.Config()
    
    # Step 1: preparation
    query = quTemp.split()
    logic = loTemp.split()
    qu_field = []  # positions of field in query
    qu_value = []  # positions of value in query
    lo_field = []  # positions of field in logic
    lo_value = []  # positions of value in logic
    for i in range(len(query)):
        reference = query[i].split(':')
        if len(reference) == 1:
            continue
        print reference
        idx = int(reference[1])
        if reference[0] == '<field>':
            qu_field.append((i, idx))
        else:
            qu_value.append((i, idx))
    print qu_field, qu_value
    for i in range(len(logic)):
        reference = logic[i].split(':')
        if len(reference) == 1:
            continue
        print reference
        idx = int(reference[1])
        if reference[0] == '<field>':
            lo_field.append((i, idx))
        else:
            lo_value.append((i, idx))
    print lo_field, lo_value
    
    # Step 2: augment to different schemas
    for j in range(len(schema_aug)):
        # Step 2.1: for each schema, build correspondence list of dictionarys: [{}, {}, {}]
        field_corr_dicts = []
        # print '=== %d schema ===' %j
        schema = schema_aug[j]
        # because there could be multiple same-type fields in one sentences, we go over field_corr
        for k in range(len(field_corr)):
            field_corr_dict = dict()
            for i in range(len(schema)):
                field = schema[i]
                #print field
                if schema[i] == 'Total' or schema[i] == 'Average':
                    continue
                value_type = config.field2word[schema[i]]['value_type']
                if value_type == field_corr[k]:
                    if value_type == 'string':
                        #field_corr_dict[field] = config.field2word[schema[i]]['value_range']
                        num_sample = 3
                        if len(config.field2word[schema[i]]['value_range']) < num_sample:
                            num_sample = len(config.field2word[schema[i]]['value_range'])
                        field_corr_dict[field] = random.sample(config.field2word[schema[i]]['value_range'], num_sample)
                    elif value_type == 'int':
                        field_corr_dict[field] = random.sample(range(1, 100), 3) 
                    elif value_type == 'date':
                        field_corr_dict[field] = random.sample(range(1970, 2011), 3)
                    elif value_type == 'ordinal':
                        field_corr_dict[field] = random.sample(['first', 'second', 'third', 'fourth', 'fifth', 'sixth','seventh', \
                                                                'eighth','ninth','last','1st', '2nd', '3rd', '4th', '5th',\
                                                                '6th','7th','8th','9th'], 3)
            field_corr_dicts.append(field_corr_dict)
        # print field_corr_dicts 
        # now the list of dicts [{str_field1:[], str_field2:[], ...}, {int_field1:[], int_field2:[], ...}]
        
        # Step 2.2: Regenerate sentence by filling into the place
        field_combinations = generateFieldCombs(field_corr_dicts)
        for field_combination in field_combinations:
            print field_combination
            newquery = [x for x in query]
            newlogic = [x for x in logic]
            # regenerate query, lower case or query_word
            for (posit, idx) in qu_field:
                field_info = config.field2word[field_combination[idx]]
                if len(field_info['query_word']) > 1:
                    if posit == 0 and 'who' in field_info['query_word']:
                        pick = 'who'
                    elif posit == 0 and 'when' in field_info['query_word']:
                        pick = 'when'
                    else:
                        pick = random.choice(field_info['query_word'])
                        while pick == 'who' or pick == 'when' or pick == 'city':
                            pick = random.choice(field_info['query_word'])
                    newquery[posit] = pick
                else:
                    newquery[posit] = field_combination[idx].lower()                
            # regenerate logic forms
            for (posit, idx) in lo_field:
                newlogic[posit] = field_combination[idx]
            if len(qu_value) > 0:
                value_combinations = generateValueCombs(field_corr_dicts, field_combination, qu_value)
                for value_combination in value_combinations:
                    morequery = [x for x in newquery]
                    morelogic = [x for x in newlogic]
                    for i in range(len(qu_value)):
                        morequery[qu_value[i][0]] = str(value_combination[i]).lower()
                    for i in range(len(lo_value)):
                        morelogic[lo_value[i][0]] = str(value_combination[i])
                    queryCollect.append(' '.join(morequery))
                    if isRepetitive(queryCollect):
                        del queryCollect[-1]
                        continue
                    logicCollect.append(' '.join(morelogic))
                    fieldCollect.append(' '.join(schema_aug[j]))
                continue
            queryCollect.append(' '.join(newquery))
            if isRepetitive(queryCollect):
                del queryCollect[-1]
                continue
            logicCollect.append(' '.join(newlogic))
            fieldCollect.append(' '.join(schema_aug[j]))
    return queryCollect, logicCollect, fieldCollect

# augment(quTemp, lo6select, field_corr_new, schema_aug)

### Conventions 
Each sentence could then be turned into a query tempelate after tagging. Now we have the logical template, query template, and several available schema, so combined with the field_corr and value_corr files we should be able to generate multiple sentences according to several schema.

In [81]:
collect2sum = """
how many elected are there in total;2
what is the total number of gold earned;0;t
the total of gold on the chart;0
what is the total amount of gold combined;0
what is the total gold on the chart;0
the total gold in the table;0
how many gold medals were there all together;0;t
how many total gold medals were awarded;0;t
how many gold were there all together;0
what is the total if you add all of the bronze numbers together;0
""".split('\n')

collect2avg = """
how many elected are there in average;2
what is the average number of gold earned;0;t
what is the average number of gold;0
the average number of gold;0
the average amount of gold earned;0;t
what is the average score on innings;4;t
what is the average of innings;4
how many runs are assigned per player;4;t
how many runs per player;4
""".split('\n')

collect4max = """which country has the most pga;5
which country has the most pga championships;5;t
which country had the most number of wins;3
which country won the largest haul of bronze medals;0;t
which nation received the largest amount of gold medals;0;t
the team with the most gold medals;0;t
the team with the most gold;0
which state has the top no._of_elected amount;2
who was the top scorer in innings;4;t
the country that won the most silver medals was;0;t
which country had the most bronze medals;0;t
which nation was ranked last;0;t
who was the last nation;0;t
""".split('\n')

collect4max_1 = """
who was the last player;4;t
who was the last state;2;t
what is the name of the last swara on this chart;7
what is the swara that holds the last position;7
what is the largest matches amount;4
""".split('\n')

collect4min = """which country had the least bronze medals;0;t
which country had the least bronze;0
which country has the least pga championships;5;t
which country had the least number of wins;3
which nation received the smallest amount of gold medals;0;t
the team with the least gold medals;0;t
the team with the least gold;0
which nation was ranked first;0
the country that won the least silver medals was;0;t
who is the top ranked nation;0;t
who was the first nation;0;t
""".split('\n')

collect4min_1 = """
who was the first player;4;t
who was the first state;2;t
what is the name of the first swara on this chart;7
what is the swara that holds the first position;7
what is the top listed player;7
what is the smallest matches amount;4
""".split('\n')

collect6selecteq = """what are the number of league_apps ted_davis has;6
what are the number of pga that zimbabwe has;5
what are the number of pga winning golfers that zimbabwe has;5;t
who only won 13 silver medals;0;t
what is the number of wins for maynooth;3
what was the number of silver medals the ivory_coast won;0;t
how many u.s._open wins does fiji have;5;t
how many u.s._open does fiji have;5
which country won only 1 medal, a bronze medal;0;t
which ranking is mexico;0
how many silver medals did brazil received;0;t
what country has won no silver medals;0;t
what is the number of silver medals did chile win;0;t
""".split('\n')

collect6selectg = """
name a player that plays in no less than 13 innings;4;t
which country was awarded more than 5 silver medals;0;t
only team to have more than 30 silver medals;0;t
only team to have more than 30 silver;0
who won more gold medals than united_states;0;t
name a player whose average was above 25;4
""".split('\n')

collect6selectl = """
name a player that plays in no more than 13 innings;4;t
which country was awarded less than 5 silver medals;0;t
only team to have less than 30 silver medals;0;t
only team to have less than 30 silver;0
who won less gold medals than united_states;0;t
name a player whose average was below 25;4
""".split('\n')

collect6counteq = """
how many times has minneapolis been the 2nd_venue;8;t
total number of times minneapolis has been the 2nd_venue;8;t
total number of times minneapolis was the 2nd_venue;8;t
""".split('\n')

collect6countl = """
how many nations got at most 8 silver;0 
the total number of countries with less than 7 gold;0
how many countries have less than 20 bronze medals;0;t
total amount of nations with less than 5 bronze;0
""".split('\n')

collect6countg = """
how many countries have more than 20 bronze;0
how many countries won more than 3 bronze metals;0;t
what is the total amount of nations with more than 5 bronze medals;0;t
the total amount of nations with more than 5 bronze;0
how many nations won at least three silver medals;0;t
""".split('\n')

collect6before = """
which year is previous to 2011;1;t
the nation before england;0
what swara is above shadja;7
what is the name of the swara that come before shatshruti_dhaivata;7;t
what is the name of the player that come before ted_tyler;4;t
which team was the previous winner before fingal_ravens in 2008;3;t
what team comes before confey;3
what is the team that comes before confey;3
""".split('\n')

collect6after = """
the nation after england;0
what swara is below shadja;7
what is the name of the swara that come after shatshruti_dhaivata;7;t
what is the name of the player that come after ted_tyler;4;t
which team was the next winner after fingal_ravens in 2008;3;t
what team comes after confey;3
what is the team that comes after confey;3
""".split('\n')

collect11sum = """
how many gold medals did japan and france combined win;0;t
how many combined gold medals did japan and france win;0;t
how many combined gold did japan and france have;0
how many gold did japan and france combined have;0
how many innings did bill and ted have in total;4
""".split('\n')

collect11diff = """
what is the difference in gold between cuba and mexico;0
what was the difference between the gold count of brazil and argentina;0
how many more gold medals has nepal won than pakistan;0;t
how many gold nepal has over pakistan;0
""".split('\n')

# [check] Delete 7
# How to deal with no/not which indicates zero?
# [check] The currect augmentation directly copy field name, could extend to query words
# [check] even the field not shown in the query, the current algorithm still works

# print collect6select
# print collect4max

lo4max = 'select <field>:0 argmax <field>:1'
lo4min = 'select <field>:0 argmin <field>:1'
lo4max_1 = 'select <field>:0 argmax <field>:0'
lo4min_1 = 'select <field>:0 argmin <field>:0'
lo6selecteq = 'select <field>:0 where <field>:1 equal <value>:1'
lo6selectl = 'select <field>:0 where <field>:1 less <value>:1'
lo6selectg = 'select <field>:0 where <field>:1 greater <value>:1'

lo2sum
lo2avg
lo6counteq
lo6countl
lo6countg
lo6before
lo6after
lo11sum
lo11diff

main_config = tu.Config()
schema_collect = main_config.schema_collect
parser = stanford.StanfordParser(model_path='/Users/richard_xiong/Documents/DeepLearningMaster/deep_parser/englishPCFG.ser.gz')

# print field_corr
# print value_corr 
# print quTemp
# print lo6select

In [96]:
def main(collect, logic):
    ''' for certain logic form, we have lines from collect files
        return --- queryCollect, logicCollect, fieldCollect
    '''
    queryCollect, logicCollect, fieldCollect = [], [], []
    for line in collect:
        # for each line, we parse the query, schema_idx(, and special_code)
        reference = line.split(';')
        if len(reference) < 2:
            continue
        query = reference[0]
        schema_idx = int(reference[1])
    
        print '*** New query ***'
        print query
        #tagging
        tagged2, field_corr, value_corr, quTemp, _ = \
                tg.sentTagging_tree(parser, query, ' '.join(schema_collect[schema_idx]))
        #converting
        if len(reference) > 2:
            field_corr_new, schema_aug = schemaRecommend(schema_idx, field_corr, True)
        else:
            field_corr_new, schema_aug = schemaRecommend(schema_idx, field_corr, False)
        #augmenting
        queryOne, logicOne, fieldOne = augment(quTemp, logic, field_corr_new, schema_aug)
        #extending collections
        queryCollect.extend(queryOne)
        logicCollect.extend(logicOne)
        fieldCollect.extend(fieldOne)
    return queryCollect, logicCollect, fieldCollect

queryCollect, logicCollect, fieldCollect = main(collect6selectg, lo6selectg)
#main(collect6selectg, lo6selectg)

*** New query ***
name a player that plays in no less than 13 innings
['<field>', 'Player']
['<field>', 'Player']
['<field>', 'Innings']
['<value>', '<num>']
[(10, 1)]
[9]
[(2, 0), (4, 0)]
[]
['<field>', '0']
['<field>', '0']
['<value>', '1']
['<field>', '1']
[(2, 0), (4, 0), (10, 1)] [(9, 1)]
['<field>', '0']
['<field>', '1']
['<value>', '1']
[(1, 0), (3, 1)] [(5, 1)]
['Player', 'Innings']
['Player', 'Runs']
['Player', 'Matches']
['Player', 'Field_Goals']
['Player', 'Free_Throws']
['Player', 'Points']
['Player', 'Games_Played']
['Player', '100s']
['Player', '50s']
*** New query ***
which country was awarded more than 5 silver medals
['<field>', 'Nation']
['<field>', 'Silver']
['<value>', '<num>']
[(7, 1)]
[6]
[(1, 0)]
[]
['<field>', '0']
['<value>', '1']
['<field>', '1']
[(1, 0), (7, 1)] [(6, 1)]
['<field>', '0']
['<field>', '1']
['<value>', '1']
[(1, 0), (3, 1)] [(5, 1)]
['Nation', 'Bronze']
['Nation', 'Silver']
['Nation', 'Gold']
*** New query ***
only team to have more than 30 silv

In [97]:
f_qu = open('new.qu','a+')
f_lo = open('new.lo','a+')
f_fi = open('new.fi','a+')

for i in range(len(queryCollect)):
    f_qu.write(queryCollect[i]+'\n')
    f_lo.write(logicCollect[i]+'\n')
    f_fi.write(fieldCollect[i]+'\n')

f_qu.close()
f_lo.close()
f_fi.close()