## Build an efficient data loader

In [50]:
import itertools

train_file = "../dataset/restaurant_revs/yelp_train.txt"
dev_file = "../dataset/restaurant_revs/yelp_val.txt"
test_file = "../dataset/restaurant_revs/yelp_test.txt"

class Txtfile(object):
    """
    Read txt file
    """
    def __init__(self, fname, source2idx=None, label2idx=None, firstline=False, limit=-1):
        self.fname = fname
        self.firstline = firstline
        self.limit = limit if limit > 0 else None
        self.source2idx = source2idx
        self.label2idx = label2idx
        self.length = None

    def __iter__(self):
        with open(self.fname, 'r') as f:
            f.seek(0)
            if self.firstline:
                # Skip the header
                next(f)
            for line in itertools.islice(f, self.limit):
                source = line.strip()
                if len(source) != 0:
                    review, label = Txtfile.process_seq(source)
                    if self.source2idx is not None:
                        review = self.source2idx(review)
                    if self.label2idx is not None:
                        label = self.label2idx(label)
                    yield review, label

    def __len__(self):
        """Iterates once over the corpus to set and store length"""
        if self.length is None:
            self.length = 0
            for _ in self:
                self.length += 1
        return self.length

    @staticmethod
    def process_seq(seq):
        seq = seq.lower().split()
        review = " ".join(seq[:-1])
        label = seq[-1]
        return review, label

In [57]:
train_gen = Txtfile(train_file)
train_data = []
train_label = []
for review, label in train_gen:
    train_data.append(review)
    train_label.append(label)

In [58]:
train_data[0:5]

["saw the bartender take a lime wedge out of someone's finished mix drink and put it in mine before he served it to me.",
 'it\'s the time of year for an annual pilgrimage from tempe to the cerreta candy co. for christmas gifts and la perla cafe for dinner. but most likely we\'ll never go back to la perla. tonight they were charging $10 just to park in their parking lot, and then if you ate dinner there, they would give you back $5. we told the lady in the parking lot that we had driven from tempe for dinner, but it made no difference," she said ""i\'m just doing what my boss told me to do."" unfortunately", i guess people who come for glendale glitters take advantage of their parking lot. we were really in the mood for mexican food, and the good news is we found another restaurant. now when we go to glendale, we will bypass la perla cafe, and go to pedro\'s at 4938 w. glendale ave. food was very good at pedro\'s and our wait staff was very helpful.',
 'ive been a big fan of ethiopian 

In [59]:
train_label[0:5]

['1_star', '1_star', '3_star', '5_star', '2_star']