diff --git a/README.md b/README.md index 98c3a92107a2..3013441ab9f9 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,12 @@ PROTIP: Set "GPU=1" environment variable if you want this to go faster. PROPROTIP: Set "DEBUG=1" environment variable if you want to see why it's slow. +### Gan is real... Generated with pure tinygrad! + +

+ +

+ ### The promise of small tinygrad will always be below 1000 lines. If it isn't, we will revert commits until tinygrad becomes smaller. diff --git a/docs/mnist_by_tinygrad.jpg b/docs/mnist_by_tinygrad.jpg new file mode 100644 index 000000000000..90cfde24cdad Binary files /dev/null and b/docs/mnist_by_tinygrad.jpg differ diff --git a/examples/mnist_gan.py b/examples/mnist_gan.py index 438ed3cb42fe..67e4b7ba7f6f 100644 --- a/examples/mnist_gan.py +++ b/examples/mnist_gan.py @@ -9,7 +9,7 @@ from tinygrad.tensor import Tensor, Function, register from extra.utils import get_parameters import tinygrad.optim as optim -from test_mnist import X_train, Y_train +from test_mnist import X_train from torchvision.utils import make_grid, save_image import torch GPU = os.getenv("GPU") is not None @@ -52,9 +52,9 @@ def forward(self, x, train=True): if __name__ == "__main__": generator = LinearGen() discriminator = LinearDisc() - batch_size = 128 + batch_size = 512 k = 1 - epochs = 100 + epochs = 300 generator_params = get_parameters(generator) discriminator_params = get_parameters(discriminator) gen_loss = [] @@ -62,13 +62,13 @@ def forward(self, x, train=True): output_folder = "outputs" os.makedirs(output_folder, exist_ok=True) train_data_size = len(X_train) - ds_noise = Tensor(np.random.uniform(size=(64,128)).astype(np.float32), gpu=GPU, requires_grad=False) + ds_noise = Tensor(np.random.randn(64,128).astype(np.float32), gpu=GPU, requires_grad=False) n_steps = int(train_data_size/batch_size) if GPU: [x.cuda_() for x in generator_params+discriminator_params] # optimizers - optim_g = optim.Adam(generator_params, lr=0.001) - optim_d = optim.Adam(discriminator_params, lr=0.001) + optim_g = optim.Adam(generator_params,lr=0.0002, b1=0.5) # 0.0002 for equilibrium! + optim_d = optim.Adam(discriminator_params,lr=0.0002, b1=0.5) def regularization_l2(model, a=1e-4): #TODO: l2 reg loss @@ -88,7 +88,7 @@ def real_label(bs): def fake_label(bs): y = np.zeros((bs,2), np.float32) - y[range(bs), [0]*bs] = -2.0 + y[range(bs), [0]*bs] = -2.0 # Can we do label smoothin? i.e -2.0 changed to -1.98789. fake_labels = Tensor(y, gpu=GPU) return fake_labels @@ -124,18 +124,18 @@ def train_generator(optimizer, data_fake): print(f"Epoch {epoch} of {epochs}") for i in tqdm(range(n_steps)): image = generator_batch() - for step in range(k): - noise = Tensor(np.random.uniform(size=(batch_size,128)), gpu=GPU) + for step in range(k): # Try with k = 5 or 7. + noise = Tensor(np.random.randn(batch_size,128), gpu=GPU) data_fake = generator.forward(noise).detach() data_real = image loss_d_step = train_discriminator(optim_d, data_real, data_fake) loss_d += loss_d_step - noise = Tensor(np.random.uniform(size=(batch_size,128)), gpu=GPU) + noise = Tensor(np.random.randn(batch_size,128), gpu=GPU) data_fake = generator.forward(noise) loss_g_step = train_generator(optim_g, data_fake) loss_g += loss_g_step fake_images = generator.forward(ds_noise).detach().cpu().data - fake_images = (fake_images.reshape(-1,1,28,28)+ 1)/2 + fake_images = (fake_images.reshape(-1, 1, 28, 28)+ 1) / 2 # 0 - 1 range. fake_images = make_grid(torch.tensor(fake_images)) save_image(fake_images, os.path.join(output_folder,f"image_{epoch}.jpg")) epoch_loss_g = loss_g / n_steps