In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard

In [2]:
# Set dimension of problem, length of string

char_list = [' ',
             '1',
             '2',
             '3',
             '4',
             '5',
             '6',
             '7',
             '8',
             '9',
             '0',
             '+',
             '-',
             '*',
             '/',
             '=',
            ]
string_length = 20
problem_dim = string_length * len(char_list)

def problem_to_tensor(problem_string):
    alphabet_size = len(char_list)
    outlist = []
    for char in problem_string:
        # print(char)
        inner_list = [0.] * alphabet_size
        inner_list[char_list.index(char)] = 1.
        # print(inner_list)
        outlist.append(inner_list)
    for _ in range(0, problem_dim-len(outlist)):
        inner_list = [0.] * alphabet_size
        outlist.append(inner_list)
    # print("-----")
    t = torch.as_tensor(outlist)
    return t

def tensor_to_problem(t):
    inlist = t.tolist()
    #print(inlist)
    outstring = ""
    for sublist in inlist:
        # print(len(sublist))
        index = sublist.index(max(sublist))
        print("INDEX: " + str(index))
        character = char_list[index]
        print(character)
        outstring += character
    outstring = outstring.strip()
    return outstring

In [3]:
class Discriminator(nn.Module):
    def __init__(self, problem_dim):
        super().__init__()
        self.disc = nn.Sequential(
            # Takes 20 digits as in_features
            # Can the 20 be replaced with problem_dim?
            nn.Linear(problem_dim, 5),
            #Activation function
            nn.LeakyReLU(0.01),
            # One output node, 0 for fake, 1 for real
            nn.Linear(5, 1),
            # Makes sure that the node has a value between 0 and 1
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.disc(x)


class Generator(nn.Module):
    # z_dim is the dimenion of the noise
    # problem_dim is the dimension of the output (length of string)
    def __init__(self, z_dim, problem_dim):
        super().__init__()
        self.gen = nn.Sequential(
            # I think that 256 is just random to expand the noise
            # 256 = 64*4
            nn.Linear(z_dim, 256),
            nn.LeakyReLU(0.01),
            nn.Linear(256, problem_dim),
            # normalize inputs to [-1, 1] so make outputs [-1, 1]
            nn.Tanh()
        )

    def forward(self, x):
        return self.gen(x)

In [4]:
# Hyperparameters etc.
device = "cuda" if torch.cuda.is_available() else "cpu"
#learning rate ( play around with this if you want)
lr = 3e-4
# noise dimension (play with this as well)
z_dim = 64 #try 128, 256
batch_size = 1
num_epochs = 50

  return torch._C._cuda_getDeviceCount() > 0


In [5]:
class ProblemDataset(Dataset):
    def __init__(self, problem_list, transform=None):
        self.problem_list = problem_list
        self.transform = transform

    def __len__(self):
        return len(self.problem_list)

    def __getitem__(self, index):
        problem_string = self.problem_list[index]
        problem_tensor = problem_to_tensor(problem_string)

        return problem_tensor

input_problems = ['32+90', '24+13', '93+03', '17+18', '68+03', '22+11', '50+50', '47+93', '08+29', '73+12']
dataset = ProblemDataset(problem_list = input_problems)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [6]:
disc = Discriminator(problem_dim).to(device)
gen = Generator(z_dim, problem_dim).to(device)
fixed_noise = torch.randn((batch_size, z_dim)).to(device)
# Params of Normalize are mean and standard deviation of dataset
transforms = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),]
)

opt_disc = optim.Adam(disc.parameters(), lr=lr)
opt_gen = optim.Adam(gen.parameters(), lr=lr)
criterion = nn.BCELoss()
writer_fake = SummaryWriter(f"logs/fake")
writer_real = SummaryWriter(f"logs/real")
step = 0

for epoch in range(num_epochs):
    for batch_idx, real in enumerate(loader):
        # Keep current batch number, flatten real to size of 20
        real = real.view(-1, problem_dim).to(device)
        batch_size = real.shape[0]

        ### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
        noise = torch.randn(batch_size, z_dim).to(device)
        fake = gen(noise)
        disc_real = disc(real).view(-1)
        lossD_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake).view(-1)
        lossD_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        lossD = (lossD_real + lossD_fake) / 2
        disc.zero_grad()
        lossD.backward(retain_graph=True)
        opt_disc.step()

        ### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
        # where the second option of maximizing doesn't suffer from
        # saturating gradients
        output = disc(fake).view(-1)
        lossG = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        lossG.backward()
        opt_gen.step()

        if batch_idx == 0:
            print(
                f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(loader)} \
                      Loss D: {lossD:.4f}, loss G: {lossG:.4f}"
            )

            with torch.no_grad():
                fake = gen(fixed_noise).reshape(-1, problem_dim)
                # I think that data is the problem that the model was trained on
                data = real.reshape(-1, problem_dim)
                fake = nn.Unflatten('fake', )
                print(fake)
                print(data)
                print("GENERATED " + tensor_to_problem(fake))
                # print("--Output--------" + str(tensor_to_problem(data)))
                
                
                #print(fake)
                #print(data)
                step += 1


Epoch [0/50] Batch 0/10                       Loss D: 0.7059, loss G: 0.8672
tensor([[ 0.0298,  0.0288, -0.0741,  0.0470,  0.2929,  0.2282, -0.3234,  0.1663,
          0.2079,  0.4134,  0.1176, -0.1425, -0.1584,  0.2428, -0.1657, -0.3612,
          0.0925, -0.4082, -0.1295,  0.1531,  0.0453, -0.0188,  0.2590, -0.3797,
          0.0250, -0.3265, -0.4409,  0.0010,  0.2621,  0.6340, -0.0427, -0.0749,
          0.2051, -0.1463, -0.4045, -0.1516,  0.0339, -0.2773,  0.1634,  0.0007,
         -0.2182, -0.2131,  0.3425,  0.1924,  0.4764, -0.0043,  0.2807,  0.2621,
          0.0229,  0.3686,  0.1665,  0.3682, -0.3005,  0.0124, -0.0571,  0.0924,
          0.1118, -0.2264, -0.0312,  0.2957,  0.1166,  0.0632, -0.2124, -0.2977,
          0.0304, -0.1189, -0.0542,  0.2433,  0.3575, -0.2590,  0.2286, -0.1494,
         -0.3733,  0.0657, -0.2001, -0.0276, -0.0647, -0.2159, -0.3026, -0.1243,
          0.1497,  0.0326,  0.1448,  0.3787, -0.1209, -0.5516,  0.0863, -0.1402,
         -0.2343,  0.2403, -0.05

IndexError: list index out of range