# Build a Wavenet

## Preparation

### Prepare the datasets

In [1]:
# Import the libraries
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
import random

# load the dataset
with open(file="datasets/names.txt", mode="r") as namestxt:
    words = namestxt.read().splitlines() 
    
# build a dictionary for the characters
chars = ["."] + sorted(list(set("".join(words))))
stoi = {s:i for i, s in enumerate(chars)}
itos = {i:s for s, i in stoi.items()}
vocab_size = len(itos)

# shuffle the words
random.seed(42)
random.shuffle(words)

# build the dataset
block_size = 3 # lengths of sequence of token 
def build_dataset(words, train=0.8, dev=0.1, test=0.1): # helper function to create datasets
	if train + test + dev != 1:
		return ValueError
	
	X, Y = [], []
	for word in words:
		context = [0]*block_size
		for char in word + ".":
			ix = stoi[char]
			X.append(context)
			Y.append(ix)
			context = context[1:]+[ix]
	X = torch.tensor(X)
	Y = torch.tensor(Y)

	n1 = int(train*(len(words)))
	n2 = int((train + dev)*(len(words)))

	Xtr, Ytr = X[:n1], Y[:n1]
	Xdev, Ydev = X[n1:n2], Y[n1:n2]
	Xtest, Ytest = X[n2:], Y[n2:]

	return Xtr, Ytr, Xdev, Ydev, Xtest, Ytest

Xtr, Ytr, Xdev, Ydev, Xtest, Ytest = build_dataset(words=words)

### Visualize the datasets

In [12]:
for x, y in zip(Xtr[:7], Ytr[:7]):
	print("".join(itos[ix.item()] for ix in x), "--->", itos[y.item()])

... ---> y
..y ---> u
.yu ---> h
yuh ---> e
uhe ---> n
hen ---> g
eng ---> .


### Create some Layers Modules