diff --git a/README.md b/README.md
index 98c3a92107a2..3013441ab9f9 100644
--- a/README.md
+++ b/README.md
@@ -121,6 +121,12 @@ PROTIP: Set "GPU=1" environment variable if you want this to go faster.
PROPROTIP: Set "DEBUG=1" environment variable if you want to see why it's slow.
+### Gan is real... Generated with pure tinygrad!
+
+
+
+
+
### The promise of small
tinygrad will always be below 1000 lines. If it isn't, we will revert commits until tinygrad becomes smaller.
diff --git a/docs/mnist_by_tinygrad.jpg b/docs/mnist_by_tinygrad.jpg
new file mode 100644
index 000000000000..90cfde24cdad
Binary files /dev/null and b/docs/mnist_by_tinygrad.jpg differ
diff --git a/examples/mnist_gan.py b/examples/mnist_gan.py
index 438ed3cb42fe..67e4b7ba7f6f 100644
--- a/examples/mnist_gan.py
+++ b/examples/mnist_gan.py
@@ -9,7 +9,7 @@
from tinygrad.tensor import Tensor, Function, register
from extra.utils import get_parameters
import tinygrad.optim as optim
-from test_mnist import X_train, Y_train
+from test_mnist import X_train
from torchvision.utils import make_grid, save_image
import torch
GPU = os.getenv("GPU") is not None
@@ -52,9 +52,9 @@ def forward(self, x, train=True):
if __name__ == "__main__":
generator = LinearGen()
discriminator = LinearDisc()
- batch_size = 128
+ batch_size = 512
k = 1
- epochs = 100
+ epochs = 300
generator_params = get_parameters(generator)
discriminator_params = get_parameters(discriminator)
gen_loss = []
@@ -62,13 +62,13 @@ def forward(self, x, train=True):
output_folder = "outputs"
os.makedirs(output_folder, exist_ok=True)
train_data_size = len(X_train)
- ds_noise = Tensor(np.random.uniform(size=(64,128)).astype(np.float32), gpu=GPU, requires_grad=False)
+ ds_noise = Tensor(np.random.randn(64,128).astype(np.float32), gpu=GPU, requires_grad=False)
n_steps = int(train_data_size/batch_size)
if GPU:
[x.cuda_() for x in generator_params+discriminator_params]
# optimizers
- optim_g = optim.Adam(generator_params, lr=0.001)
- optim_d = optim.Adam(discriminator_params, lr=0.001)
+ optim_g = optim.Adam(generator_params,lr=0.0002, b1=0.5) # 0.0002 for equilibrium!
+ optim_d = optim.Adam(discriminator_params,lr=0.0002, b1=0.5)
def regularization_l2(model, a=1e-4):
#TODO: l2 reg loss
@@ -88,7 +88,7 @@ def real_label(bs):
def fake_label(bs):
y = np.zeros((bs,2), np.float32)
- y[range(bs), [0]*bs] = -2.0
+ y[range(bs), [0]*bs] = -2.0 # Can we do label smoothin? i.e -2.0 changed to -1.98789.
fake_labels = Tensor(y, gpu=GPU)
return fake_labels
@@ -124,18 +124,18 @@ def train_generator(optimizer, data_fake):
print(f"Epoch {epoch} of {epochs}")
for i in tqdm(range(n_steps)):
image = generator_batch()
- for step in range(k):
- noise = Tensor(np.random.uniform(size=(batch_size,128)), gpu=GPU)
+ for step in range(k): # Try with k = 5 or 7.
+ noise = Tensor(np.random.randn(batch_size,128), gpu=GPU)
data_fake = generator.forward(noise).detach()
data_real = image
loss_d_step = train_discriminator(optim_d, data_real, data_fake)
loss_d += loss_d_step
- noise = Tensor(np.random.uniform(size=(batch_size,128)), gpu=GPU)
+ noise = Tensor(np.random.randn(batch_size,128), gpu=GPU)
data_fake = generator.forward(noise)
loss_g_step = train_generator(optim_g, data_fake)
loss_g += loss_g_step
fake_images = generator.forward(ds_noise).detach().cpu().data
- fake_images = (fake_images.reshape(-1,1,28,28)+ 1)/2
+ fake_images = (fake_images.reshape(-1, 1, 28, 28)+ 1) / 2 # 0 - 1 range.
fake_images = make_grid(torch.tensor(fake_images))
save_image(fake_images, os.path.join(output_folder,f"image_{epoch}.jpg"))
epoch_loss_g = loss_g / n_steps