# Is Synthetic Data Real?



> Harvard CS 242: Computing at Scale (Fall 2023)

>
> Instructor: Professor HT Kung

> Authors: Michael Xiang, Marcos Johnson-Noya, Minkai Li, Corwin Cheung


---

### **1. Using CLIP as an indicator of text specificity.**

---
We will be using the CIFAR-10 dataset (CIFAR-10). We will run the dataset through CLIP and test if classes that performed better on CLIP also perform better for StableRep.


In [None]:
# Imports

import time
import copy
import sys
from collections import OrderedDict
import os

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

**Downloading CLIP**

In [None]:
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

In [None]:
# Importing clip and the model
import clip
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

100%|████████████████████████████████████████| 338M/338M [00:03<00:00, 110MiB/s]


**Loading in different datasets**

In [None]:
# Load the CIFAR-10 test dataset with the new transformations
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=preprocess)

# DataLoader
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Load the OxfordIIITPet test dataset with the new transformations
testset = torchvision.datasets.OxfordIIITPet(root='./data', split="test",
                                       download=True, transform=preprocess)

# DataLoadert
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to data/oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:30<00:00, 25561340.06it/s]


Extracting data/oxford-iiit-pet/images.tar.gz to data/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to data/oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:01<00:00, 11541017.52it/s]


Extracting data/oxford-iiit-pet/annotations.tar.gz to data/oxford-iiit-pet


In [None]:
# Load the STL10 test dataset with the new transformations
testset = torchvision.datasets.STL10(root='./data', split="test",
                                       download=True, transform=preprocess)

# DataLoadert
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./data/stl10_binary.tar.gz


100%|██████████| 2640397119/2640397119 [03:00<00:00, 14654695.77it/s]


Extracting ./data/stl10_binary.tar.gz to ./data


In [None]:
# Load the LSUN test dataset with the new transformations
testset = torchvision.datasets.FashionMNIST("./data", train = False, transform = preprocess, download = True)

# DataLoader
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd '/content/drive/MyDrive/'

/content/drive/MyDrive


In [None]:
ls

[0m[01;36m'242 stuff'[0m@                          [01;34m'Generated Images For StableRep Testing'[0m/   [01;34mtest[0m/
'Alien Music Theory.gslides'           IMG_1226.JPG                               [01;34mvalid[0m/
[01;34m'Colab Notebooks'[0m/                    'MHD PINN Diagonals.ipynb'
'Copy of 2023-CS124-lec22-notes.pdf'  'Michael Xiang - Resume.pdf'


In [None]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

test_dataset = ImageFolder("valid/",
                      transform = preprocess)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

**Running Inference**

In [None]:
def get_text_prompts(classes):
    result = []
    for c in classes:
        result.append(f"A photo of a {c}")
    return result

# classes = ["Apple Pie", "Bibimbap", "Cannoli", "Edamame", "Falafel", "French Toast", "Ice Cream", "Ramen", "Sushi", "Tiramisu"]
# classes = ['Abyssinian', 'Bengal', 'Birman', 'Bombay', 'British Shorthair', 'Egyptian Mau', 'Maine Coon', 'Persian', 'Ragdoll', 'Russian Blue', 'Siamese', 'Sphynx', 'american bulldog', 'american pit bull terrier', 'basset hound', 'beagle', 'boxer', 'chihuahua', 'english cocker spaniel', 'english setter', 'german shorthaired', 'great pyrenees', 'havanese', 'japanese chin', 'keeshond', 'leonberger', 'miniature pinscher', 'newfoundland', 'pomeranian', 'pug', 'saint bernard', 'samoyed', 'scottish terrier', 'shiba inu', 'staffordshire bull terrier', 'wheaten terrier', 'yorkshire terrier']
classes = ["airplane", "bird", "car", "cat", "deer", "dog", "horse", "monkey", "ship", "truck"]
# classes = ["agricultural","airplane","baseball diamond","beach",
#  "buildings","chaparral","dense residential","forest","freeway","golf course","harbor",
#  "intersection","medium residential","mobile homepark","overpass",
#  "parking lot","river","runway","sparse residential","storage tanks","tennis court"]
# classes = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle Boot"]


prompts = get_text_prompts(classes)

In [None]:
# Tokenize the classes of the CIFAR-10 Dataset
text = clip.tokenize(prompts).to(device)

correct_label_accuracy = [[0, 0] for _ in range(len(prompts))]

# Getting pure accuracy
total_images = 0
total_correct = 0

# Iterate over the DataLoader
for batch in testloader:
    images, labels = batch

    # Process the batch of images
    images = torch.stack([image for image in images]).to(device)


    with torch.no_grad():
        image_features = model.encode_image(images)
        text_features = model.encode_text(text)

        # Calculate logits
        logits_per_image, logits_per_text = model(images, text)
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
        for i, prob in enumerate(probs):
            total_images += 1
            correct_label_accuracy[labels[i]][1] += 1
            if np.argmax(prob) == labels[i]:
                correct_label_accuracy[labels[i]][0] += 1
                total_correct += 1
print(total_images)
print(total_correct)
print(total_correct/total_images)
for i, (num_correct, num_total) in enumerate(correct_label_accuracy):
    print(f"Class {i} Photos: {num_total}; Accuracy: {num_correct/num_total}")


**Plotting**

In [None]:
# Plotting
import matplotlib.pyplot as plt

# Data for plotting
classes = range(len(prompts))
class_accuracies = [correct/total for correct, total in correct_label_accuracy]
total_accuracy = total_correct/total_images

# Plotting the accuracies
plt.figure(figsize=(10, 6))
plt.bar(classes, class_accuracies, color="blue", label="Per Class Accuracy")
plt.axhline(y=total_accuracy, color="red", linestyle="-", label=f"Total Accuracy - {total_accuracy}")

# Adding labels and title
plt.xlabel("Class")
plt.ylabel("Accuracy")
plt.title("Accuracy per Class and Total Accuracy")
plt.xticks(classes)
plt.yticks([i * 0.1 for i in range(11)])
plt.legend()

# Show plot
plt.show()
