# ATIS in LUIS

In [94]:
import json, gzip, pickle

In [95]:
import os
os.chdir('/Users/jacobsw/Desktop')

In [96]:
with open('ojoatis.json') as json_file:
    ojoatis = json.load(json_file)
ojoatis['entities'][0]['children'].append(u'arrive_time.time')
ojoatis['entities'][2]['children'][5] = u'return_time.period_mod'
ojoatis['entities'][2]['children'][6] = u'return_time.period_of_day'
joshentities = ojoatis['entities']
''' FORMAT:
[{u'children': [u'arrive_time.end_time',
   u'arrive_time.period_mod',
   u'arrive_time.period_of_day',
   u'arrive_time.start_time',
   u'arrive_time.time_relative'],
  u'name': u'arrive_time'},
 {u'children': [u'depart_time.end_time',
   u'depart_time.period_mod',
   u'depart_time.period_of_day',
   u'depart_time.start_time',
   u'depart_time.time',
   u'depart_time.time_relative',
   u'meal',
   u'meal_description'],
  u'name': u'depart_time_meal'},
  ...
'''

" FORMAT:\n[{u'children': [u'arrive_time.end_time',\n   u'arrive_time.period_mod',\n   u'arrive_time.period_of_day',\n   u'arrive_time.start_time',\n   u'arrive_time.time_relative'],\n  u'name': u'arrive_time'},\n {u'children': [u'depart_time.end_time',\n   u'depart_time.period_mod',\n   u'depart_time.period_of_day',\n   u'depart_time.start_time',\n   u'depart_time.time',\n   u'depart_time.time_relative',\n   u'meal',\n   u'meal_description'],\n  u'name': u'depart_time_meal'},\n  ...\n"

In [97]:
def entities_dict_transform(entities):
    new_dict = {}
    for entry in entities:
        for child in entry['children']:
            new_dict[child] = entry['name']
    return new_dict
''' TRANSFORMED:
{u'aircraft_code': u'codes_types',
 u'airline_code': u'codes_types',
 u'airline_name': u'from_loc',
 u'airport_code': u'codes_types',
 u'airport_name': u'from_loc',
 u'arrive_date.date_relative': u'arrive_date',
 u'arrive_date.day_name': u'arrive_date',
 u'arrive_date.day_number': u'arrive_date',
 ...
'''

" TRANSFORMED:\n{u'aircraft_code': u'codes_types',\n u'airline_code': u'codes_types',\n u'airline_name': u'from_loc',\n u'airport_code': u'codes_types',\n u'airport_name': u'from_loc',\n u'arrive_date.date_relative': u'arrive_date',\n u'arrive_date.day_name': u'arrive_date',\n u'arrive_date.day_number': u'arrive_date',\n ...\n"

In [98]:
joshentities = entities_dict_transform(joshentities)

In [99]:
class ATIS:
    
    def __init__(self, filename='/Users/jacobsw/Downloads/atis.fold1.pkl.gz',
                       entities=joshentities):
        self.filename = filename
        self.entities = entities
        self.__load()
    
    def __load(self):
        f = gzip.open(self.filename, 'rb')
        self.train_set, self.valid_set, self.test_set, self.dicts = pickle.load(f)
        f.close()
        self.idx2labels = {idx:label for (label,idx) in self.dicts['labels2idx'].iteritems()}
        self.idx2words = {idx:word for (word,idx) in self.dicts['words2idx'].iteritems()}
    
    def get_annotated_sent(self, sent_idx):
        annotated = zip(map(self.idx2words.get,self.train_set[0][sent_idx]),
                        map(self.idx2labels.get,self.train_set[2][sent_idx]))
        return annotated
    
    def get_utterance_json(self, sent_idx):
        annotated = self.get_annotated_sent(sent_idx)
        utterance = {u'text':unicode(' '.join([word for word,label in annotated])),
                 u'intent':'None',
                 u'entities':[]}
        entity = {u'entity':'',u'startPos':0,u'endPos':0}
        for i,item in enumerate(annotated):
            current_entity = ''
            if item[1].startswith('B'):
                entity['startPos'] = i
                current_entity = item[1].split('-')[1]
                entity['entity'] = self.entities[current_entity] + '::' + current_entity
                if i+1>=len(annotated) or annotated[i+1][1]=='O':
                    entity['endPos'] = i
                    utterance['entities'].append(entity)
                    entity = {u'entity':'',u'startPos':0,u'endPos':0}
                    current_entity = ''
                    continue
            elif item[1].startswith('I'):
                if i+1>=len(annotated) or annotated[i+1][1]=='O':
                    entity['endPos'] = i
                    utterance['entities'].append(entity)
                    entity = {u'entity':'',u'startPos':0,u'endPos':0}
                    current_entity = ''
                else: continue
            else: continue
        return utterance 
    
    def populate_utterances(self, luis_json, num_utterances=2000):
        for i in xrange(num_utterances):
            luis_json['utterances'].append(self.get_utterance_json(i))
        return luis_json
        

In [100]:
atis = ATIS()

In [101]:
atis.get_utterance_json(0)

{u'entities': [{u'endPos': 5,
   u'entity': u'from_loc::airline_name',
   u'startPos': 5},
  {u'endPos': 7, u'entity': u'flight::flight_number', u'startPos': 7},
  {u'endPos': 10, u'entity': u'from_loc::fromloc.city_name', u'startPos': 9},
  {u'endPos': 14,
   u'entity': u'stop_to_loc::toloc.city_name',
   u'startPos': 12}],
 u'intent': 'None',
 u'text': u'what aircraft is used on delta flight DIGITDIGITDIGITDIGIT from kansas city to salt lake city'}

In [102]:
new_ojoatis = atis.populate_utterances(ojoatis)

In [103]:
new_ojoatis

{u'actions': [],
 u'bing_entities': [],
 u'composites': [],
 u'culture': u'en-us',
 u'desc': u'ner',
 u'entities': [{u'children': [u'arrive_time.end_time',
    u'arrive_time.period_mod',
    u'arrive_time.period_of_day',
    u'arrive_time.start_time',
    u'arrive_time.time_relative',
    u'arrive_time.time'],
   u'name': u'arrive_time'},
  {u'children': [u'depart_time.end_time',
    u'depart_time.period_mod',
    u'depart_time.period_of_day',
    u'depart_time.start_time',
    u'depart_time.time',
    u'depart_time.time_relative',
    u'meal',
    u'meal_description'],
   u'name': u'depart_time_meal'},
  {u'children': [u'return_date.date_relative',
    u'return_date.day_name',
    u'return_date.day_number',
    u'return_date.month_name',
    u'return_date.today_relative',
    u'return_time.period_mod',
    u'return_time.period_of_day',
    u'cost_relative',
    u'fare_amount'],
   u'name': u'return_cost'},
  {u'children': [u'flight',
    u'flight_days',
    u'flight_mod',
    u'flight

In [106]:
with open('new_ojoatis.json', 'w') as f:
     json.dump(new_ojoatis, f)

In [108]:
atis.valid_set[0][0]

array([554, 194, 268,  64,  62,  16,   8, 234, 481,  20,  40,  58, 234,
       415, 205], dtype=int32)

In [110]:
zip(map(atis.idx2words.get,atis.valid_set[0][0]),
    map(atis.idx2labels.get,atis.valid_set[2][0]))

[('what', 'O'),
 ('flights', 'O'),
 ('leave', 'O'),
 ('atlanta', 'B-fromloc.city_name'),
 ('at', 'O'),
 ('about', 'B-depart_time.time_relative'),
 ('DIGIT', 'B-depart_time.time'),
 ('in', 'O'),
 ('the', 'O'),
 ('afternoon', 'B-depart_time.period_of_day'),
 ('and', 'O'),
 ('arrive', 'O'),
 ('in', 'O'),
 ('san', 'B-toloc.city_name'),
 ('francisco', 'I-toloc.city_name')]