### References
https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import csv
import gzip

In [6]:
"""
展示如何使用 DataLoader，Dataset 来进行数据的读取和训练批次数据的准备
"""


class NameDataset(Dataset):
    """ Diabetes dataset."""

    # Initialize your data, download, etc.
    def __init__(self, is_train_set=False):
        filename = './data/names_train.csv.gz' if is_train_set else './data/names_test.csv.gz'
        with gzip.open(filename, "rt") as f:
            reader = csv.reader(f)
            rows = list(reader)

        self.names = [row[0] for row in rows]
        self.countries = [row[1] for row in rows]
        self.len = len(self.countries)
        self.country_list = list(sorted(set(self.countries)))

    def __getitem__(self, index):
        return self.names[index], self.countries[index]

    def __len__(self):
        return self.len

    def get_countries(self):
        return self.country_list

    def get_country(self, id):
        return self.country_list[id]

    def get_country_id(self, country):
        return self.country_list.index(country)

In [7]:
dataset = NameDataset(False)
print(dataset.get_countries())
print(dataset.get_country(3))
print(dataset.get_country_id('Korean'))

train_loader = DataLoader(dataset=dataset, batch_size=10, shuffle=True)

print(len(train_loader.dataset))
for epoch in range(2):
    for i, (names, countries) in enumerate(train_loader):
        # Run your training process
        print(epoch, i, "names", names, "countries", countries)

['Arabic', 'Chinese', 'Czech', 'Dutch', 'English', 'French', 'German', 'Greek', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portuguese', 'Russian', 'Scottish', 'Spanish', 'Vietnamese']
Dutch
11
6700
0 0 names ('Mindiashvili', 'Chaldymov', 'Gander', 'Pensky', 'Schlantz', 'Raikhelgauz', 'Guerin', 'Sung', 'Wilde', 'Bakhtiarov') countries ('Russian', 'Russian', 'English', 'Russian', 'Czech', 'Russian', 'French', 'Korean', 'English', 'Russian')
0 1 names ('Ohishi', 'Mikhaltsov', 'Elensky', 'Ukhobotin', 'Glenn', 'Gorchinsky', 'Goloborodko', 'Mutsu', 'Amelyakin', 'Kajiyama') countries ('Japanese', 'Russian', 'Russian', 'Russian', 'English', 'Russian', 'Russian', 'Japanese', 'Russian', 'Japanese')
0 2 names ('Ujva', 'Nana', 'Durham', 'Vinding', 'Pehterev', 'Isakovich', 'Turnham', 'Ustinkin', 'Zuraw', 'Zhurin') countries ('Russian', 'Italian', 'English', 'Russian', 'Russian', 'Russian', 'English', 'Russian', 'Polish', 'Russian')
0 3 names ('Hlebnikov', 'Paimuhin', 'Denney', 'Groos', 'N