## BAX-423 Big Data Analytics
## Data Dinosaurs: Kangjian (James) Gao, Raghav Rama Bhadran, Sahiti Sukhavasi, Trishal Jadhav
## Final Project - Criminal Sketch Generation

In [1]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip3 install torch torchvision
!pip install torch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
# Loading libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import glob

In [4]:
# Define custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_folder, feature_data, transform=None):
        self.image_folder = image_folder
        self.img_list = glob.glob(self.image_folder + "/*.jpg")
        self.feature_data = feature_data
        self.transform = transform

    def __len__(self):
        return len(self.image_folder)

    def __getitem__(self, idx):
        image_path = self.img_list[idx]
        image = Image.open(image_path)
        feature = self.feature_data[idx]
        if self.transform:
            image = self.transform(image)
        return image, feature

In [5]:
# Define generator network
class Generator(nn.Module):
    def __init__(self, input_size, feature_size, output_channels):
        super(Generator, self).__init__()
        self.fc = nn.Linear(input_size + feature_size, 128*16*16)
        self.relu = nn.ReLU()
        self.deconv = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1)
        self.bn = nn.BatchNorm2d(64)
        self.deconv2 = nn.ConvTranspose2d(64, output_channels, kernel_size=4, stride=2, padding=1)
        self.tanh = nn.Tanh()

    def forward(self, noise, features):
        x = torch.cat((noise, features), dim=1)
        x = x.to(torch.float32)
        x = self.fc(x)
        x = self.relu(x)
        x = x.view(-1, 128, 16, 16)
        x = self.deconv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.deconv2(x)
        x = self.tanh(x)
        return x

In [6]:
# Define discriminator network
class Discriminator(nn.Module):
    def __init__(self, input_channels):
        super(Discriminator, self).__init__()
        self.conv = nn.Conv2d(input_channels, 64, kernel_size=4, stride=2, padding=1)
        self.relu = nn.LeakyReLU(0.2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1)
        self.bn = nn.BatchNorm2d(128)
        self.fc = nn.Linear(128*16*16, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn(x)
        x = self.relu(x)
        x = x.view(-1, 128*16*16)
        x = self.fc(x)
        x = self.sigmoid(x)
        return x

In [7]:
# Define GAN model
class GAN(nn.Module):
    def __init__(self, input_size, feature_size, output_channels):
        super(GAN, self).__init__()
        self.generator = Generator(input_size, feature_size, output_channels)
        self.discriminator = Discriminator(output_channels)

    def forward(self, noise, features):
        generated_images = self.generator(noise, features)
        return generated_images

In [8]:
# Define training loop
def train_gan(generator, discriminator, dataloader, num_epochs, device):
    criterion = nn.BCELoss()
    generator_optimizer = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
    discriminator_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

    for epoch in range(num_epochs):
        for batch_idx, (images, features) in enumerate(dataloader):
            batch_size = images.size(0)
            images = images.to(device)
            features = features.to(device)

            # Generate random noise vector
            noise = torch.randn(batch_size, input_size).to(device)

            # Update discriminator
            discriminator_optimizer.zero_grad()
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            real_outputs = discriminator(images)
            real_loss = criterion(real_outputs, real_labels)
            real_loss.backward()

            generated_images = generator(noise, features)
            fake_outputs = discriminator(generated_images.detach())
            fake_loss = criterion(fake_outputs, fake_labels)
            fake_loss.backward()

            discriminator_optimizer.step()

            # Update generator
            generator_optimizer.zero_grad()
            fake_outputs = discriminator(generated_images)
            generator_loss = criterion(fake_outputs, real_labels)
            generator_loss.backward()
            generator_optimizer.step()

            if batch_idx % 100 == 0:
                print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(dataloader)}], "
                      f"Discriminator Loss: {real_loss+fake_loss}, Generator Loss: {generator_loss}")

In [9]:
# Specify paths and parameters
image_folder = 'drive/MyDrive/Big Data/Images'
feature_file = 'drive/MyDrive/Big Data/Features.csv'
input_size = 100  # Size of the input noise vector
feature_size = 45  # Size of the input feature vector
output_channels = 3  # Number of image channels (e.g., 1 for grayscale, 3 for RGB)
num_epochs = 1000
batch_size = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transforms
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize images to a uniform size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Defining function to load features
def load_features(file_path, has_header=True):
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        if has_header:
            header = next(reader)  # Skip the header row
        features = [list(map(float, row)) for row in reader]
    return np.array(features)

# Load dataset and create a dataloader
feature_data = load_features(feature_file)  # Load feature data
# Create instance of CustomDataset
dataset = CustomDataset(image_folder, feature_data, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)

In [None]:
# Create the GAN model
gan = GAN(input_size, feature_size, output_channels).to(device)

# Train the GAN
train_gan(gan.generator, gan.discriminator, dataloader, num_epochs, device)

Epoch [1/1000], Batch [1/1], Discriminator Loss: 1.4057718515396118, Generator Loss: 2.6958160400390625
Epoch [2/1000], Batch [1/1], Discriminator Loss: 0.7171617150306702, Generator Loss: 3.504302501678467
Epoch [3/1000], Batch [1/1], Discriminator Loss: 0.36753398180007935, Generator Loss: 4.035143852233887
Epoch [4/1000], Batch [1/1], Discriminator Loss: 0.2454102784395218, Generator Loss: 4.126587390899658
Epoch [5/1000], Batch [1/1], Discriminator Loss: 0.2208651751279831, Generator Loss: 4.032221794128418
Epoch [6/1000], Batch [1/1], Discriminator Loss: 0.24453957378864288, Generator Loss: 3.98093318939209
Epoch [7/1000], Batch [1/1], Discriminator Loss: 0.22359494864940643, Generator Loss: 4.125439167022705
Epoch [8/1000], Batch [1/1], Discriminator Loss: 0.20094603300094604, Generator Loss: 4.21767520904541
Epoch [9/1000], Batch [1/1], Discriminator Loss: 0.18327823281288147, Generator Loss: 4.265844821929932
Epoch [10/1000], Batch [1/1], Discriminator Loss: 0.17152139544487, G

In [None]:
# Generate images
num_samples = 10 # Number of images to generate
noise = torch.randn(num_samples, input_size).to(device) # Generate random noise
features = feature_data[0:num_samples] # Feature inputs
features = torch.from_numpy(features)
generated_images = gan.generator(noise, features).detach().cpu()

# Display generated images
fig, axes = plt.subplots(1, num_samples, figsize=(15, 3))

for i, image in enumerate(generated_images):
    axes[i].imshow(image.permute(1, 2, 0))
    axes[i].axis('off')

plt.tight_layout()
plt.show()