In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import os
import torchvision.utils as vutils
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torch.nn.functional as F
!pip install wandb



train_data_path    = '/kaggle/input/assignment-4-dl/Assignment_4/Train_data'
train_sketch_path  = '/kaggle/input/assignment-4-dl/Assignment_4/Train/Train_sketch/Contours'
test_data_path     = '/kaggle/input/assignment-4-dl/Assignment_4/Test/Test_data/Test'
test_sketch_path   = '/kaggle/input/assignment-4-dl/Assignment_4/Test/Test_sketch/Test_contours'
train_labels       = pd.read_csv('/kaggle/input/csvvvvvv/Train_labels.csv')
test_labels        = pd.read_csv('/kaggle/input/csvvvvvv/Test_Labels.csv')


transform_image = transforms.Compose([
                transforms.Resize((128, 128)),
                transforms.CenterCrop((128, 128)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
            ])
transform_sketch = transforms.Compose([
                transforms.Resize((128, 128)),
                transforms.CenterCrop((128, 128)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5], std=[0.5])
            ])
def load_images(image_folder_path, transform):
    image_file_names = sorted(os.listdir(image_folder_path))
    images_list = []
    for image_name in image_file_names:
        if image_name.split('.')[1] in ['jpg', 'png']:
            image_path = os.path.join(image_folder_path, image_name)
            image = Image.open(image_path)
            # image = transforms.Resize((128, 128))(image)
            # image = transforms.CenterCrop((128,128))(image)
            # image = transforms.ToTensor()(image)
            # image = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])(image)
            image = transform(image)
            images_list.append(image.unsqueeze(0))
    images = torch.cat(images_list, dim=0)
    return image_file_names, images

train_data_name, train_data = load_images(image_folder_path = train_data_path,transform = transform_image )
train_sketch_name, train_sketch = load_images(image_folder_path = train_sketch_path,transform = transform_sketch)
test_data_name, test_data = load_images(image_folder_path = test_data_path, transform = transform_image)
test_sketch_name, test_sketch = load_images(image_folder_path = test_sketch_path, transform = transform_sketch )







In [None]:

class LabelEmbedding(nn.Module):
    def __init__(self):
        super(LabelEmbedding, self).__init__()
        self.fc1 = nn.Linear(7, 256)
        self.fc2 = nn.Linear(256, 1024)
        self.fc3 = nn.Linear(1024, 128*128)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        x = x.view(-1, 128, 128)
        x = x.unsqueeze(1)
        return x.detach()
Embedding = LabelEmbedding()




train_label = train_labels.iloc[:,1:]
train_label = torch.tensor(train_label.values, dtype=torch.float32)
test_label = test_labels.iloc[:,1:]
test_label = torch.tensor(test_label.values, dtype=torch.float32)

train_label = Embedding(train_label)
for file_name,label_name in zip(train_data_name[0:], train_labels.iloc[:,0]):
    check = False
    if file_name.split('.')[0] == label_name:
        check = True
if check == True:
    train_data = torch.cat((train_data, train_label), dim = 1)

print(train_data.shape)

train_loader = DataLoader(train_data, batch_size = 32, shuffle=True, num_workers=3,pin_memory=True)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(4, 32, kernel_size = 4, stride = 2, padding = 1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.2, inplace = True),
            nn.Conv2d(32, 64, kernel_size= 4, stride = 2, padding = 1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, inplace = True),
            nn.Dropout(0.4),
            nn.Conv2d(64, 1, kernel_size = 4, stride= 2, padding = 1),
            nn.BatchNorm2d(1),
            nn.LeakyReLU(0.2, inplace = True),
            nn.Dropout(0.4),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# class Generator(nn.Module):
#     def __init__(self):
#         super(Generator, self).__init__()

#         self.conv1 = nn.Conv2d(2, 64, kernel_size=4, stride=2, padding=1)
#         self.conv2 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1)
#         self.conv3 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1)
#         self.conv4 = nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1)
#         self.conv5 = nn.Conv2d(512, 1024, kernel_size=4, stride=2, padding=1)

#         self.deconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2, padding=1)
#         self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1)
#         self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1)
#         self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1)
#         self.deconv5 = nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1)

#         self.batch_norm = nn.BatchNorm2d(512)
#         self.leaky_relu = nn.LeakyReLU(0.2)
#         self.tanh = nn.Tanh()

#     def forward(self, x):
#         x1 = self.leaky_relu(self.conv1(x))
#         x2 = self.leaky_relu(self.conv2(x1))
#         x3 = self.leaky_relu(self.conv3(x2))
#         x4 = self.leaky_relu(self.conv4(x3))
#         x5 = self.leaky_relu(self.conv5(x4))

#         y = self.leaky_relu(self.batch_norm(self.deconv1(x5)))
#         y = (y + x4)
#         y = self.leaky_relu(self.deconv2(y))
#         y = (y + x3)
#         y = self.leaky_relu(self.deconv3(y))
#         y = (y + x2)
#         y = self.leaky_relu(self.deconv4(y))
#         y = (y + x1)
#         y = self.tanh(self.deconv5(y))

#         return y


class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(2, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 1024, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


torch.Size([10015, 4, 128, 128])


In [None]:

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.4),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.4),
            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5),
            nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.5)
        )

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 4 * 4, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 77),
            nn.ReLU(),
            nn.Linear(77, 7)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x




In [None]:
# !pip install --upgrade wandb
import wandb
api_key = '4255b25e16e2daac5513ba068e73e46cbb68b63f'
wandb.login(key = api_key)
wandb.init(project="DL Assignment 4_2", entity="m23mac004")



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
D_Loss,▇▄▆▆█▇▇▇▇▄▁▆▇▇▇▄▇▇▇▇▇▇▇▇▆▁▁▁▁▁▁▁▁▁▁▁
D_Loss_Fake,█▄▅▅▆▆▆▅▅▃▁▅▆▅▅▃▆▅▅▆▆▆▆▆▅▁▁▁▁▁▁▁▁▁▁▁
D_Loss_Real,▃▃▆▆█▇▇▇▇▃▁▆▇▇▇▄▇▇▇▇▇▇▇▇▆▁▁▁▁▁▁▁▁▁▁▁
G_Loss,▁▂▁▂▁▁▁▁▁▂▃▂▁▁▁▂▁▁▁▁▁▁▁▁▂▅▆▆▆▇▇▇▇███
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███

0,1
D_Loss,0.00028
D_Loss_Fake,0.00015
D_Loss_Real,0.00013
G_Loss,11.35503
epoch,36.0


In [None]:
discriminator = Discriminator().to(device)
generator = Generator().to(device)
classifier = Classifier().to(device)
classifier.load_state_dict(torch.load('/kaggle/input/classifier/pytorch/gener/1/classifier.pth'))
loss_function = nn.BCELoss()
loss_function2 = nn.CrossEntropyLoss()

d_optimizer = torch.optim.Adam(discriminator.parameters(), lr = 0.0002, betas=(0.5, 0.999))
g_optimizer = torch.optim.Adam(generator.parameters(), lr = 0.0002, betas=(0.5, 0.999))


epochs = 50
for epoch in range(epochs):
    d_loss_real_list  = []
    d_loss_fake_list  = []
    d_loss_total = 0
    g_loss_total = 0
    d_real_score = 0
    d_fake_score = 0
    classi_loss = 0
    classi_accuarcy = 0
    for X in train_loader:

        d_optimizer.zero_grad()
        real_data = X
        real_label = torch.ones(X.shape[0], 1)
        d_real_output = discriminator(real_data.to(device))
        d_loss_real = loss_function(d_real_output, real_label.to(device))
        d_loss_real_list.append(d_loss_real.cpu().item())
        d_real_score += torch.mean(d_real_output).item()
        # d_loss_real.backward()

        X_sketch  = train_sketch[torch.randperm(train_sketch.shape[0])[:X.shape[0]]]
        # X_sketch  = add_noise(X_sketch)
        random_label = Embedding(torch.eye(7)[torch.randint(0, 7, size=(X.shape[0],))])
        X_sketch = torch.cat([X_sketch, random_label], dim=1)
        fake_data = generator(X_sketch.to(device))
        fake_data = torch.cat([fake_data, random_label.to(device)], dim = 1)
        fake_label_d = torch.zeros(X.shape[0], 1)

        # d_fake_output = discriminator(fake_data.detach())
        d_fake_output = discriminator(fake_data)
        d_loss_fake = loss_function(d_fake_output, fake_label_d.to(device))
        d_loss_fake_list.append(d_loss_fake.cpu().item())
        d_fake_score += torch.mean(d_fake_output).item()
        # d_loss_fake.backward()
        # d_optimizer.step()

        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        d_optimizer.step()

        d_loss_total += d_loss.item()

        g_optimizer.zero_grad()
        X_sketch  = train_sketch[torch.randperm(train_sketch.shape[0])[:X.shape[0]]]
        # X_sketch  = add_noise(X_sketch)
        random_label = Embedding(torch.eye(7)[torch.randint(0, 7, size=(X.shape[0],))])
        X_sketch = torch.cat([X_sketch, random_label], dim=1)
        fake_data = generator(X_sketch.to(device))
        fake_data = torch.cat([fake_data, random_label.to(device)], dim=1)
        fake_label = torch.ones(X.shape[0], 1)

        g_output = discriminator(fake_data.to(device))
        fake_label_g = torch.ones(X.shape[0], 1)
        g_loss = loss_function(g_output, fake_label_g.to(device))
        g_loss.backward()
        g_optimizer.step()
        g_loss_total += g_loss.item()




        g_optimizer.zero_grad()
        c_optimizer.zero_grad()
        X_sketch  = train_sketch[torch.randperm(train_sketch.shape[0])[:X.shape[0]]]


        classification_true_labels = torch.eye(7)[torch.randint(0, 7, size=(X.shape[0],))]

        random_label = Embedding(classification_true_labels)
        X_sketch = torch.cat([X_sketch, random_label], dim=1)
        fake_data = generator(X_sketch.to(device))
        y_pred_classification = classifier(fake_data)

        classification_loss = loss_function2(y_pred_classification, torch.max(classification_true_labels.to(device), 1)[1])
        classification_acc = ((torch.max(y_pred_classification, 1)[1] == torch.max(classification_true_labels.to(device), 1)[1]).sum().item()) / X.shape[0]
        classification_loss.backward()
        g_optimizer.step()
        classi_loss += classification_loss.item()
        classi_accuarcy += classification_acc

    d_real_loss = np.mean(d_loss_real_list)
    d_fake_loss = np.mean(d_loss_fake_list)

    wandb.log({
                "epoch": epoch + 1,
                "D_Loss_Real": d_real_loss,
                "D_Loss_Fake": d_fake_loss,
                "D_Loss": d_loss_total / len(train_loader),
                "G_Loss": g_loss_total / len(train_loader),
#                 "D_Real_Score": d_real_score / len(train_loader),
#                 "D_Fake_Score": d_fake_score / len(train_loader),
#                 "C_Loss" : classi_loss / len(train_loader),
#                 "C_Accuracy": (classi_accuarcy * 100) / len(train_loader)

            })

    print(f"Epoch [{epoch+1}/{epochs}] | "
      f"D_Loss_Real: {d_real_loss:.4f} | "
      f"D_Loss_Fake: {d_fake_loss:.4f} | "
      f"D_Real_Score: {d_real_score / len(train_loader):.4f} | "
      f"D_Fake_Score: {d_fake_score / len(train_loader):.4f} | "
      f"D_Loss: {d_loss_total / len(train_loader):.4f} | "
      f"G_Loss: {g_loss_total / len(train_loader):.4f} | "
      f"C_Loss: {classi_loss / len(train_loader):.4f}  | "
      f"C_Accuracy: {(classi_accuarcy * 100) / len(train_loader):.4f}")

    stats = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)
    def denorm(img_tensors):
        return img_tensors * stats[1][0] + stats[0][0]
    num_class = 7
    grid = []
    classi_loss = 0
    classi_accuarcy = 0
    for c in range(num_class):
        samples = test_sketch[torch.randperm(test_sketch.shape[0])[:10]]
        label = torch.eye(num_class)[c]
        labels = label.unsqueeze(0).repeat(samples.shape[0], 1)
        labels_classification = torch.max(labels, 1)[1]
        labels = Embedding(labels)
        samples = torch.cat([samples, labels], dim=1)
        images = generator(samples.to(device))

        y_pred_cla = classifier(images)
        classification_loss = F.cross_entropy(y_pred_cla, labels_classification.to(device))
        classi_loss += classification_loss.item()

        classification_accuarcy = (((torch.max(y_pred_cla, 1)[1] == labels_classification.to(device))).sum().item()) / 10
        classi_accuarcy += classification_accuarcy

        grid_image = vutils.make_grid(images, nrow=10, padding=2)
        grid.append(grid_image)
    grid = torch.cat(grid, dim = 1)
    directory = 'generated_images_DL1 lr = 0.00002__4'
    os.makedirs(directory, exist_ok=True)
    vutils.save_image(denorm(grid), f"{directory}/epoch_{epoch}.png")

    print(f'classification_loss: {classi_loss / 7} | '
        f'classification_accuracy: {((classi_accuarcy / 7) * 100):.4f}')

stats = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)
def denorm(img_tensors):
    return img_tensors * stats[1][0] + stats[0][0]

num_class = 7
grid = []
for c in range(num_class):
    images = test_data[torch.max(test_label.data, 1)[1] == c][:10]
    grid_image = vutils.make_grid(images, nrow=10, padding=2)
    grid.append(grid_image)
grid = torch.cat(grid, dim = 1)
directory = 'Original Samples'
os.makedirs(directory, exist_ok=True)
vutils.save_image(denorm(grid), f"{directory}/Original_sample.png")

Epoch [1/50] | D_Loss_Real: 0.3793 | D_Loss_Fake: 2.3142 | D_Real_Score: 0.6914 | D_Fake_Score: 0.8315 | D_Loss: 2.6935 | G_Loss: 0.2035 | C_Loss: 1.9478  | C_Accuracy: 14.5184
classification_loss: 1.9468941688537598 | classification_accuracy: 14.2857
Epoch [2/50] | D_Loss_Real: 0.6663 | D_Loss_Fake: 0.7306 | D_Real_Score: 0.5139 | D_Fake_Score: 0.5180 | D_Loss: 1.3969 | G_Loss: 0.6630 | C_Loss: 1.9256  | C_Accuracy: 15.5970
classification_loss: 2.3539233718599593 | classification_accuracy: 11.4286
Epoch [3/50] | D_Loss_Real: 0.6375 | D_Loss_Fake: 0.7304 | D_Real_Score: 0.5293 | D_Fake_Score: 0.5175 | D_Loss: 1.3680 | G_Loss: 0.6679 | C_Loss: 0.3318  | C_Accuracy: 84.9238
classification_loss: 3.2494504842314718 | classification_accuracy: 65.7143
Epoch [4/50] | D_Loss_Real: 0.5306 | D_Loss_Fake: 0.6438 | D_Real_Score: 0.5961 | D_Fake_Score: 0.4676 | D_Loss: 1.1744 | G_Loss: 0.9114 | C_Loss: 0.0284  | C_Accuracy: 99.1514
classification_loss: 2.107721555063368 | classification_accuracy: 6

In [None]:
torch.save(generator.state_dict(), 'generator.pth')

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image

# Load the PNG image
image_path = "/kaggle/input/generated-image/image gen.png"
gen_image = Image.open(image_path)



In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image

# Load the PNG image
image_path = "/kaggle/input/original/Original_sample.png"
orig_image = Image.open(image_path)



In [None]:
# print(tensor[1].shape)

(130, 3)


In [None]:
import torch
import torchvision.transforms as transforms
from torchvision.models import inception_v3
from scipy.stats import entropy
import numpy as np
from scipy.linalg import sqrtm

# Function to calculate the inception score
def inception_score(images, inception_model, splits=10):
    preds = torch.nn.functional.softmax(inception_model(images), dim=1)
    scores = []
    for i in range(splits):
        part = preds[i * (images.shape[0] // splits): (i + 1) * (images.shape[0] // splits), :]
        kl = part * (torch.log(part) - torch.log(torch.mean(part, dim=0)))

        kl = torch.mean(torch.sum(kl, dim=1))

        scores.append(torch.exp(kl))
    return torch.mean(torch.tensor(scores)), torch.std(torch.tensor(scores))

# Function to calculate the FID
def calculate_fid(real_features, generated_features):
    mu_real, mu_gen = torch.mean(real_features, dim=0), torch.mean(generated_features, dim=0)
    sigma_real = torch_cov(real_features)
    sigma_gen = torch_cov(generated_features)
    sqrt_diff = torch.sqrt(sigma_real.mm(sigma_gen))
    if torch.any(torch.isnan(sqrt_diff)):
        sqrt_diff[torch.isnan(sqrt_diff)] = 0
    fid = torch.norm(mu_real - mu_gen)**2 + torch.trace(sigma_real + sigma_gen - 2 * sqrt_diff)
    return fid

def torch_cov(x):
    mean_x = torch.mean(x, dim=0)
    x = x - mean_x.unsqueeze(0)
    if x.size(0) <= 1:
        return torch.zeros_like(torch.mm(x.t(), x))
    return 1 / (x.size(0) - 1) * x.t().mm(x)

# Function to preprocess images
def preprocess_images(images):
    preprocess = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((128, 128)),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
    ])
    return preprocess(images).unsqueeze(0)

# Load pre-trained Inception model
inception_model = inception_v3(pretrained=True, transform_input=False)
inception_model.eval()


generated_images = gen_image
real_images = orig_image

# Preprocess images
generated_images_preprocessed = preprocess_images(generated_images)
real_images_preprocessed = preprocess_images(real_images)
# print(real_images_preprocessed)

# Compute Inception Score
is_mean, is_std = inception_score(generated_images_preprocessed, inception_model)
print("Inception Score - Mean:", is_mean.item(), "Standard Deviation:", is_std.item())

# Compute FID
real_features = inception_model(real_images_preprocessed).detach().cpu()
generated_features = inception_model(generated_images_preprocessed).detach().cpu()
fid = calculate_fid(real_features, generated_features)
print("FID:", fid.item())
