In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="6"

In [6]:
import numpy as np
pretrained_ans = np.array([0, 1, 0, 1, 1], dtype=np.int64)
finetuned_ans = np.array([0, 0, 1, 1, 0], dtype=np.int64)

final_ans = np.ceil((pretrained_ans + finetuned_ans)/2).astype(np.int64)
final_ans

array([0, 1, 1, 1, 1])

In [2]:
from PIL import Image
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset

import lavis
from lavis.models import load_model_and_preprocess
 
import random 
random.seed(43)
random.random()

  from .autonotebook import tqdm as notebook_tqdm
2023-07-31 21:11:28.002746: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


0.038551839337380045

In [3]:
class TextInvDataset(Dataset):
    def __init__(self, csv, vis_processors=None, txt_processors=None):
        
        self.path_and_labels = pd.read_csv(csv, index_col="img_path")
        self.vis_processors = vis_processors
        self.txt_processors = txt_processors

    def __len__(self):
        
        return len(list(self.path_and_labels.index))

    def __getitem__(self, index):

        image_path = list(self.path_and_labels.index)[index]
        image = Image.open(image_path).convert("RGB")
        if self.vis_processors:
            image = self.vis_processors(image)
        
        label = self.path_and_labels.loc[image_path, "label"]
        
        is_uncommon = "uncommon" in image_path

        return image, label, is_uncommon

#### Query batch of images

In [4]:

# This is for query lost of images

from os import listdir
from os.path import isfile, join
from tqdm import tqdm
from sklearn.metrics import accuracy_score, confusion_matrix

class InstructBLIP():
    def __init__(self, name="blip2_vicuna_instruct_textinv", model_type="vicuna7b", is_eval=True, device="cpu") -> None:
        print(f'Loading model...')
        #self.model, self.vis_processors, self.txt_processors = load_model_and_preprocess(name, model_type, is_eval, device)
        self.imgs = []
        self.labels = []
        
        # QA
        self.question = ""
        
        # results
        self.acc = None
        self.confusion_mat = None
        
        self.acc_3class = None
        self.confusion_mat_3class = None
        
        self.com_acc = None
        self.com_confusion_mat = None
        self.uncom_acc = None
        self.uncom_confusion_mat = None

    def LoadModels(self, model, vis_processors, txt_processors, device):
        self.model = model
        self.vis_processors = vis_processors
        self.txt_processors = txt_processors
        self.device = device
    
    def LoadImages(self, dir, num):
        onlyfiles = []
        
        for f in sorted(listdir(dir)):
            if isfile(join(dir, f)):
                onlyfiles.append(join(dir, f))
        
        onlyfiles = random.sample(onlyfiles, num)
        
        raw_img_list = []
        with tqdm(total=len(onlyfiles), desc=f'Loading imgs from {dir}') as pbar:
            for f in onlyfiles:
                raw_img = Image.open(f).convert("RGB")
                raw_img_list.append(raw_img)
                pbar.update(1)
        
        return raw_img_list

    def LoadData(self, real_dir, fake_dir, num=1000):
        #real_imgs = LoadImages(join(root_dir, "0_real"))
        #fake_imgs = LoadImages(join(root_dir, "1_fake"))
        real_imgs = self.LoadImages(real_dir, num)
        fake_imgs = self.LoadImages(fake_dir, num)
        
        self.imgs = real_imgs + fake_imgs
        self.labels = [0]*len(real_imgs) + [1]*len(fake_imgs)
        #return self.imgs, self.labels
      
    def LoadData_batch(self, csv_path):
        self.dataset = TextInvDataset(csv=csv_path, vis_processors=self.vis_processors["eval"])
        self.dataloader = DataLoader(dataset=self.dataset, batch_size=8, shuffle=False, num_workers=8)    
        
    def LoadData3Class(self, real_dir, fake_common_dir, fake_uncommon_dir, num=[1000, 500, 500]):
        #real_imgs = LoadImages(join(root_dir, "0_real"))
        #fake_imgs = LoadImages(join(root_dir, "1_fake"))
        self.num = num
        real_imgs = self.LoadImages(real_dir, num[0])
        fake_common_imgs = self.LoadImages(fake_common_dir, num[1])
        fake_uncommon_imgs = self.LoadImages(fake_uncommon_dir, num[2])
        
        self.imgs = real_imgs + fake_common_imgs + fake_uncommon_imgs
        self.labels = [0]*len(real_imgs) + [1]*(len(fake_common_imgs)+len(fake_uncommon_imgs))
        self.label_3class = [0]*len(real_imgs) + [1]*len(fake_common_imgs) + [2]*len(fake_uncommon_imgs)
        #return self.imgs, self.labels, self.label_3class

    def QueryImgs(self, question, true_string="yes"):
        self.ans_list = []
        self.question = question
        
        with tqdm(total=len(self.imgs), desc=f'Answering') as pbar:
            for idx, img in enumerate(self.imgs):
                image = self.vis_processors["eval"](img).unsqueeze(0).to(self.device)

                samples = {"image": image, "text_input": question}
                
                ans = self.model.predict_answers(samples=samples, inference_method="generate")[0]
                self.ans_list.append(0 if ans == true_string else 1)
                
                pbar.update(1)
        
        self.acc = accuracy_score(self.labels, self.ans_list)
        self.confusion_mat = confusion_matrix(self.labels, self.ans_list)
        
        self.PrintResult()
        
        return self.acc, self.confusion_mat, self.ans_list
    
    def QueryImgs_batch(self, question, true_string="yes"):
        self.labels = []
        self.label_3class = []
        self.ans_list = []
        self.question = question
        
        for image, label, is_uncommon in tqdm(self.dataloader):
            
            image = image.to(self.device)
            
            questions = [self.question] * image.shape[0]
            samples = {"image": image, "text_input": questions}
            
            ans = self.model.predict_answers(samples=samples, inference_method="generate")
            pred_label = [0 if a == true_string else 1 for a in ans]
            self.ans_list += pred_label
            
            label = [0 if l == true_string else 1 for l in label]
            self.labels += label
            
            label_3class = label.copy()
            label_3class = [2 if is_uncommon[idx] else l for idx, l in enumerate(label)]
            
            self.label_3class += label_3class
        
        self.acc = accuracy_score(self.labels, self.ans_list)
        self.confusion_mat = confusion_matrix(self.labels, self.ans_list)
        
        self.PrintResult()
        
        self.ans_list = np.array(self.ans_list)
        self.labels = np.array(self.labels)
        self.label_3class = np.array(self.label_3class)
        
        return self.acc, self.confusion_mat, self.ans_list, self.labels, self.label_3class
    
    def Query(self, image, question):
        image = self.vis_processors["eval"](image).unsqueeze(0).to(self.device)
        
        samples = {"image": image, "text_input": question}
        ans = self.model.predict_answers(samples=samples, inference_method="generate")[0]
        return ans

    def PrintResult(self, three_class=False, acc=None, confusion_mat=None, ans_list=None, labels=None, label_3class=None):
        
        if acc:
            self.acc = acc
        if confusion_mat:
            self.confusion_mat = confusion_mat
        if ans_list:
            self.ans_list = ans_list
        if labels:
            self.labels = labels
        if label_3class:
            self.label_3class = label_3class
        
        if three_class:
            #assert type(self.num) == list, "Type of num should be list."
            
            print(f'Question: {self.question}\n')
            
            print(f'=== Overall ===')
            print(f'Acc: {self.acc*100:.2f}%')
            self.PrintConfusion(self.confusion_mat)
            print('\n')
            
            real_ans_list = self.ans_list[self.label_3class==0]
            real_label = [0] * len(real_ans_list)
            self.real_acc = accuracy_score(real_label, real_ans_list)
            self.real_confusion_mat = confusion_matrix(real_label, real_ans_list, labels=[0,1])
            print(f'=== Real images ===')
            print(f'Acc: {self.real_acc*100:.2f}%')
            self.PrintConfusion(self.real_confusion_mat)
            print('\n')
            
            com_ans_list = self.ans_list[self.label_3class==1]
            com_label = [1] * len(com_ans_list)
            self.com_acc = accuracy_score(com_label, com_ans_list)
            self.com_confusion_mat = confusion_matrix(com_label, com_ans_list, labels=[0,1])
            print(f'=== Common fake images ===')
            print(f'Acc: {self.com_acc*100:.2f}%')
            self.PrintConfusion(self.com_confusion_mat)
            print('\n')
            
            uncom_ans_list = self.ans_list[self.label_3class==2]
            uncom_label = [1] * len(uncom_ans_list)
            self.uncom_acc = accuracy_score(uncom_label, uncom_ans_list)
            self.uncom_confusion_mat = confusion_matrix(uncom_label, uncom_ans_list, labels=[0,1])
            print(f'=== Uncommon fake images ===')
            print(f'Acc: {self.uncom_acc*100:.2f}%')
            self.PrintConfusion(self.uncom_confusion_mat)
        else:
            print(f'Question: {self.question}\n')
            print(f'Acc: {self.acc*100:.2f}%')
            self.PrintConfusion(self.confusion_mat)
    
    def PrintConfusion(self, mat):
        padding = ' '
        print(f'         | Pred true | Pred false |')
        print(f'GT true  | {mat[0, 0]:{padding}<{10}}| {mat[0, 1]:{padding}<{11}}|')
        print(f'GT false | {mat[1, 0]:{padding}<{10}}| {mat[1, 1]:{padding}<{11}}|')
        
    def MultipleAns(self, ans1, ans2):
    
        # Q1: Is this photo common in real world?
        # Q2: Is this photo generated by a model?
        
        final_ans = []
        for ans in zip(ans1, ans2):
            if ans[0] == 0 and ans[1] == 0:
                final_ans.append(0)
            else:
                final_ans.append(1)
        
        acc = accuracy_score(self.labels, final_ans)
        confusion_mat = confusion_matrix(self.labels, final_ans)
        print(f'Accuracy: {acc*100:.2f}%')
        self.PrintConfusion(confusion_mat)
        
        self.ans_list = final_ans
        self.acc = acc
        self.confusion_mat = confusion_mat
        
        return acc, confusion_mat, final_ans

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_vicuna_instruct_textinv", model_type="vicuna7b", is_eval=True, device=device)
#model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_t5_instruct", model_type="flant5xl", is_eval=True, device=device)
#instruct = InstructBLIP(name="blip2_vicuna_instruct", model_type="vicuna7b", is_eval=True, device=device)

tokenizer OK!


In [6]:
instruct = InstructBLIP()
instruct.LoadModels(model, vis_processors, txt_processors, device)

Loading model...


In [7]:
#image = Image.open("/eva_data0/denny/coco2014/val2014/COCO_val2014_000000000042.jpg")
image = Image.open("/eva_data0/denny/sd2/coco2014_train/samples/00000.png")
questions = ["Is this photo [*]?"]
ans = instruct.Query(image, questions)
print(ans)

no


In [8]:
csv_path = "/eva_data0/denny/textual_inversion/total_60k/60k_test_label.csv"
#csv_path = "/eva_data0/denny/debug_label.csv"
instruct.LoadData_batch(csv_path=csv_path)

question = "Is this photo [*]?"
acc, confusion_mat, ans_list, labels, label_3class = instruct.QueryImgs_batch(question=question, true_string="yes")
print(f'Acc: {acc*100:.2f}%')
instruct.PrintConfusion(confusion_mat)

  0%|          | 0/777 [00:00<?, ?it/s]

100%|██████████| 777/777 [09:49<00:00,  1.32it/s]

Question: Is this photo [*]?

Acc: 92.28%
         | Pred true | Pred false |
GT true  | 2805      | 287        |
GT false | 193       | 2930       |
Acc: 92.28%
         | Pred true | Pred false |
GT true  | 2805      | 287        |
GT false | 193       | 2930       |





In [9]:
instruct.PrintResult(three_class=True)

Question: Is this photo [*]?

=== Overall ===
Acc: 92.28%
         | Pred true | Pred false |
GT true  | 2805      | 287        |
GT false | 193       | 2930       |


=== Real images ===
Acc: 90.72%
         | Pred true | Pred false |
GT true  | 2805      | 287        |
GT false | 0         | 0          |


=== Common fake images ===
Acc: 89.19%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 167       | 1378       |


=== Uncommon fake images ===
Acc: 98.35%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 26        | 1552       |


In [10]:
real_dir = "/eva_data0/denny/coco2014/train2014"
fake_common_dir = "/eva_data0/denny/SemanticError/SD2/0_real/"
fake_uncommon_dir = "/eva_data0/denny/SemanticError/SD2/1_fake/"
real_num = 200
num = [real_num, real_num//2, real_num//2]
#num = [50, 25, 25]
instruct.LoadData3Class(real_dir, fake_common_dir, fake_uncommon_dir, num)

Loading imgs from /eva_data0/denny/coco2014/train2014: 100%|██████████| 200/200 [00:03<00:00, 51.07it/s]
Loading imgs from /eva_data0/denny/SemanticError/SD2/0_real/: 100%|██████████| 100/100 [00:02<00:00, 33.49it/s]
Loading imgs from /eva_data0/denny/SemanticError/SD2/1_fake/: 100%|██████████| 100/100 [00:03<00:00, 32.72it/s]


In [11]:
acc1, mat1, ans1 = instruct.QueryImgs("Is this photo [*]?", true_string="yes")

Answering: 100%|██████████| 400/400 [02:17<00:00,  2.90it/s]

Question: Is this photo [*]?

Acc: 96.25%
         | Pred true | Pred false |
GT true  | 186       | 14         |
GT false | 1         | 199        |





In [12]:
instruct.PrintResult(three_class=True)

Question: Is this photo [*]?

=== Overall ===
Acc: 96.25%
         | Pred true | Pred false |
GT true  | 186       | 14         |
GT false | 1         | 199        |


=== Real images ===
Acc: 93.00%
         | Pred true | Pred false |
GT true  | 186       | 14         |
GT false | 0         | 0          |


=== Common fake images ===
Acc: 99.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 1         | 99         |


=== Uncommon fake images ===
Acc: 100.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 0         | 100        |


In [15]:
acc1, mat1, ans1 = instruct.QueryImgs("Is this photo common in real world?", true_string="yes")

Answering: 100%|██████████| 2000/2000 [09:48<00:00,  3.40it/s]

Question: Is this photo common in real world?

Acc: 79.40%
         | Pred true | Pred false |
GT true  | 819       | 181        |
GT false | 231       | 769        |





In [16]:
instruct.PrintResult(three_class=True)

Question: Is this photo common in real world?

=== Overall ===
Acc: 79.40%
         | Pred true | Pred false |
GT true  | 819       | 181        |
GT false | 231       | 769        |


=== Common fake images ===
Acc: 64.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 180       | 320        |


=== Uncommon fake images ===
Acc: 89.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 51        | 449        |


In [25]:
import numpy as np
mat = np.array([[819,181],[231,769]])
print(f'=== Real images ===')
print(f'Acc: {(mat[0,0]/1000)*100:.2f}%')
instruct.PrintConfusion(mat)

=== Real images ===
Acc: 81.90%
         | Pred true | Pred false |
GT true  | 819       | 181        |
GT false | 231       | 769        |


In [17]:
question = "Is this photo generated by a model?"
acc2, mat2, ans2 = instruct.QueryImgs(question, true_string="no")

Answering: 100%|██████████| 2000/2000 [15:43<00:00,  2.12it/s]

Question: Is this photo generated by a model?

Acc: 66.10%
         | Pred true | Pred false |
GT true  | 658       | 342        |
GT false | 336       | 664        |





In [18]:
instruct.PrintResult(three_class=True)

Question: Is this photo generated by a model?

=== Overall ===
Acc: 66.10%
         | Pred true | Pred false |
GT true  | 658       | 342        |
GT false | 336       | 664        |


=== Common fake images ===
Acc: 71.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 141       | 359        |


=== Uncommon fake images ===
Acc: 61.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 195       | 305        |


In [19]:
acc, confusion_mat, ans = instruct.MultipleAns(ans1, ans2)

Accuracy: 72.85%
         | Pred true | Pred false |
GT true  | 560       | 440        |
GT false | 103       | 897        |


In [20]:
instruct.PrintResult(three_class=True)

Question: Is this photo generated by a model?

=== Overall ===
Acc: 72.85%
         | Pred true | Pred false |
GT true  | 560       | 440        |
GT false | 103       | 897        |


=== Common fake images ===
Acc: 85.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 71        | 429        |


=== Uncommon fake images ===
Acc: 93.60%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 32        | 468        |


In [75]:
question = "Is this photo generated by AI?"
instruct.QueryImgs(question, true_string="no")
instruct.PrintResult(three_class=True)

Answering: 100%|██████████| 2000/2000 [10:16<00:00,  3.25it/s]

Question: Is this photo generated by AI?

Acc: 50.75%
         | Pred true | Pred false |
GT true  | 998       | 2          |
GT false | 983       | 17         |
Question: Is this photo generated by AI?

=== Overall ===
Acc: 50.75%
         | Pred true | Pred false |
GT true  | 998       | 2          |
GT false | 983       | 17         |


=== Common fake images ===
Acc: 2.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 486       | 14         |


=== Uncommon fake images ===
Acc: 0.60%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 497       | 3          |





In [76]:
instruct.PrintResult(three_class=True)

Question: Is this photo generated by AI?

=== Overall ===
Acc: 50.75%
         | Pred true | Pred false |
GT true  | 998       | 2          |
GT false | 983       | 17         |


=== Common fake images ===
Acc: 2.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 486       | 14         |


=== Uncommon fake images ===
Acc: 0.60%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 497       | 3          |


In [16]:
question = "Is this photo common in real world?"
acc1, mat1, ans1 = instruct.QueryImgs(question, true_string="yes")
instruct.PrintResult(three_class=True)

Answering: 100%|██████████| 200/200 [01:25<00:00,  2.35it/s]

Question: Is this photo common in real world?

Acc: 81.00%
         | Pred true | Pred false |
GT true  | 82        | 18         |
GT false | 20        | 80         |
Question: Is this photo common in real world?

=== Overall ===
Acc: 81.00%
         | Pred true | Pred false |
GT true  | 82        | 18         |
GT false | 20        | 80         |


=== Common fake images ===
Acc: 74.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 13        | 37         |


=== Uncommon fake images ===
Acc: 86.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 7         | 43         |





In [79]:
instruct.PrintResult(three_class=True)

Question: Is this photo common in real world?

=== Overall ===
Acc: 80.25%
         | Pred true | Pred false |
GT true  | 836       | 164        |
GT false | 231       | 769        |


=== Common fake images ===
Acc: 64.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 180       | 320        |


=== Uncommon fake images ===
Acc: 89.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 51        | 449        |


In [81]:
question = "Is this photo possible in real world?"
instruct.QueryImgs(question, true_string="yes")
instruct.PrintResult(three_class=True)

Answering: 100%|██████████| 2000/2000 [16:25<00:00,  2.03it/s]

Question: Is this photo possible in real world?

Acc: 57.45%
         | Pred true | Pred false |
GT true  | 740       | 260        |
GT false | 591       | 409        |
Question: Is this photo possible in real world?

=== Overall ===
Acc: 57.45%
         | Pred true | Pred false |
GT true  | 740       | 260        |
GT false | 591       | 409        |


=== Common fake images ===
Acc: 39.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 301       | 199        |


=== Uncommon fake images ===
Acc: 42.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 290       | 210        |





In [82]:
instruct.PrintResult(three_class=True)

Question: Is this photo possible in real world?

=== Overall ===
Acc: 57.45%
         | Pred true | Pred false |
GT true  | 740       | 260        |
GT false | 591       | 409        |


=== Common fake images ===
Acc: 39.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 301       | 199        |


=== Uncommon fake images ===
Acc: 42.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 290       | 210        |


In [45]:
real_dir = "/eva_data0/denny/coco2014/train2014"
fake_common_dir = "/eva_data0/denny/SemanticError/IF/0_real/"
fake_uncommon_dir = "/eva_data0/denny/SemanticError/IF/1_fake/"
real_num = 1000
num = [real_num, real_num//2, real_num//2]
#num = [50, 25, 25]
instruct.LoadData3Class(real_dir, fake_common_dir, fake_uncommon_dir, num)

Loading imgs from /eva_data0/denny/coco2014/train2014: 100%|██████████| 1000/1000 [00:03<00:00, 252.17it/s]
Loading imgs from /eva_data0/denny/SemanticError/IF/0_real/: 100%|██████████| 500/500 [00:13<00:00, 35.73it/s]
Loading imgs from /eva_data0/denny/SemanticError/IF/1_fake/: 100%|██████████| 500/500 [00:14<00:00, 33.47it/s]


In [49]:
question = "Is this photo common in real world?"
acc1, mat1, ans1 = instruct.QueryImgs(question, true_string="yes")
instruct.PrintResult(three_class=True)

Answering: 100%|██████████| 2000/2000 [09:40<00:00,  3.44it/s]

Question: Is this photo common in real world?

Acc: 74.95%
         | Pred true | Pred false |
GT true  | 836       | 164        |
GT false | 337       | 663        |
Question: Is this photo common in real world?

=== Overall ===
Acc: 74.95%
         | Pred true | Pred false |
GT true  | 836       | 164        |
GT false | 337       | 663        |


=== Common fake images ===
Acc: 39.60%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 302       | 198        |


=== Uncommon fake images ===
Acc: 93.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 35        | 465        |





In [50]:
instruct.PrintResult(three_class=True)

Question: Is this photo common in real world?

=== Overall ===
Acc: 74.95%
         | Pred true | Pred false |
GT true  | 836       | 164        |
GT false | 337       | 663        |


=== Common fake images ===
Acc: 39.60%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 302       | 198        |


=== Uncommon fake images ===
Acc: 93.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 35        | 465        |


In [51]:
question = "Is this photo generated by a model?"
acc2, mat2, ans2 = instruct.QueryImgs(question, true_string="no")
instruct.PrintResult(three_class=True)

Answering: 100%|██████████| 2000/2000 [17:05<00:00,  1.95it/s]

Question: Is this photo generated by a model?

Acc: 63.95%
         | Pred true | Pred false |
GT true  | 645       | 355        |
GT false | 366       | 634        |
Question: Is this photo generated by a model?

=== Overall ===
Acc: 63.95%
         | Pred true | Pred false |
GT true  | 645       | 355        |
GT false | 366       | 634        |


=== Common fake images ===
Acc: 63.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 185       | 315        |


=== Uncommon fake images ===
Acc: 63.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 181       | 319        |





In [52]:
instruct.PrintResult(three_class=True)

Question: Is this photo generated by a model?

=== Overall ===
Acc: 63.95%
         | Pred true | Pred false |
GT true  | 645       | 355        |
GT false | 366       | 634        |


=== Common fake images ===
Acc: 63.00%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 185       | 315        |


=== Uncommon fake images ===
Acc: 63.80%
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 181       | 319        |


In [53]:
acc, confusion_mat, ans = instruct.MultipleAns(ans1, ans2)

Accuracy: 69.60%
         | Pred true | Pred false |
GT true  | 554       | 446        |
GT false | 162       | 838        |


In [4]:
# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_t5_instruct", model_type="flant5xl", is_eval=True, device=device)
#model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_t5_instruct", model_type="flant5xxl", is_eval=True, device=device)
model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_vicuna_instruct", model_type="vicuna7b", is_eval=True, device=device)
#model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_vicuna_instruct", model_type="vicuna13b", is_eval=True, device=device)

Loading checkpoint shards: 100%|██████████| 2/2 [00:13<00:00,  6.96s/it]


In [13]:
# For query lots of images
real_dir = "/eva_data0/denny/coco2014/train2014"
fake_dir = "/eva_data0/denny/SemanticError/SD2/0_real/"
#imgs, labels = PrepareData("/eva_data0/denny/SemanticError/SD2/test/")
imgs, labels = PrepareData(real_dir, fake_dir, num=1000)

Loading imgs from /eva_data0/denny/coco2014/train2014: 100%|██████████| 1000/1000 [00:03<00:00, 257.93it/s]
Loading imgs from /eva_data0/denny/SemanticError/SD2/0_real/: 100%|██████████| 1000/1000 [00:22<00:00, 45.26it/s]


In [14]:
# For query lots of images
question = "Is this photo possible in real world?"
acc, confusion_mat, com_acc, com_conf_mat, uncom_acc, uncom_conf_mat = QueryImgs(imgs, labels, question, model, vis_processors, txt_processors, device)

Answering: 100%|██████████| 2000/2000 [17:37<00:00,  1.89it/s]

Question: Is this photo possible in real world?
Acc: 56.85%
         | Pred true | Pred false |
GT true  | 716       | 284        |
GT false | 579       | 421        |





In [None]:
print(acc)
PrintConfusion(confusion_mat)

print(com_acc)
PrintConfusion(com_conf_mat)
print(uncom_acc)
PrintConfusion(uncom_conf_mat)

In [35]:
# For query lots of images
real_dir = "/eva_data0/denny/coco2014/train2014"
fake_dir = "/eva_data0/denny/SemanticError/SD2/1_fake/"
#imgs, labels = PrepareData("/eva_data0/denny/SemanticError/SD2/test/")
imgs, labels = PrepareData(real_dir, fake_dir, num=200)

Loading imgs from /eva_data0/denny/coco2014/train2014: 100%|██████████| 200/200 [00:01<00:00, 199.41it/s]
Loading imgs from /eva_data0/denny/SemanticError/SD2/1_fake/: 100%|██████████| 200/200 [00:03<00:00, 56.16it/s]


In [33]:
# For query lots of images
question = "Is this photo possible in real world?"
acc, confusion_mat, com_acc, com_conf_mat, uncom_acc, uncom_conf_mat = QueryImgs(imgs, labels, question, model, vis_processors, txt_processors, device)

Answering: 100%|██████████| 200/200 [01:31<00:00,  2.19it/s]

Question: Is this photo possible in real world?
Acc: 60.50%
         | Pred true | Pred false |
GT true  | 74        | 26         |
GT false | 53        | 47         |





In [34]:
print(acc)
PrintConfusion(confusion_mat)

print(com_acc)
PrintConfusion(com_conf_mat)
print(uncom_acc)
PrintConfusion(uncom_conf_mat)

0.605
         | Pred true | Pred false |
GT true  | 74        | 26         |
GT false | 53        | 47         |
0.46
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 27        | 23         |
0.46
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 27        | 23         |


In [18]:
# For query lots of images
question = "Is this photo common in real world?"
acc, confusion_mat = QueryImgs(imgs, labels, question, model, vis_processors, txt_processors, device)

Answering: 100%|██████████| 2000/2000 [10:51<00:00,  3.07it/s]

Question: Is this photo common in real world?
Acc: 83.70%
         | Pred true | Pred false |
GT true  | 812       | 188        |
GT false | 138       | 862        |





In [24]:
question = "Is this photo generated by a model?"
acc, confusion_mat = QueryImgs(imgs, labels, question, model, vis_processors, txt_processors, device, true_string="no")

Answering: 100%|██████████| 2000/2000 [15:58<00:00,  2.09it/s]

Question: Is this photo generated by a model?
Acc: 62.55%
         | Pred true | Pred false |
GT true  | 630       | 370        |
GT false | 379       | 621        |





In [39]:
# For query lots of images
real_dir = "/eva_data0/denny/coco2014/train2014"
fake_dir = "/eva_data0/denny/SemanticError/SD2/real_and_fake/"
#imgs, labels = PrepareData("/eva_data0/denny/SemanticError/SD2/test/")
imgs, labels = PrepareData(real_dir, fake_dir, num=1000)

Loading imgs from /eva_data0/denny/coco2014/train2014: 100%|██████████| 1000/1000 [00:04<00:00, 244.46it/s]
Loading imgs from /eva_data0/denny/SemanticError/SD2/real_and_fake/: 100%|██████████| 1000/1000 [00:10<00:00, 91.52it/s]


In [40]:
question = "Is this photo generated by a model?"
acc, confusion_mat, com_acc, com_conf_mat, uncom_acc, uncom_conf_mat = QueryImgs(imgs, labels, question, model, vis_processors, txt_processors, device, true_string="no")
print(acc)
PrintConfusion(confusion_mat)

Answering:  84%|████████▎ | 1671/2000 [13:03<02:13,  2.46it/s]

In [None]:
print(acc)
PrintConfusion(confusion_mat)

print(com_acc)
PrintConfusion(com_conf_mat)
print(uncom_acc)
PrintConfusion(uncom_conf_mat)

0.715
         | Pred true | Pred false |
GT true  | 64        | 36         |
GT false | 21        | 79         |
0.78
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 11        | 39         |
0.78
         | Pred true | Pred false |
GT true  | 0         | 0          |
GT false | 11        | 39         |


#### Load an example image

In [None]:
#raw_image = Image.open("../docs/_static/merlion.png").convert("RGB")
raw_image = Image.open("/eva_data/denny/SemanticError/SD2/1_fake/00000.png").convert("RGB")
#display(raw_image.resize((596, 437)))
display(raw_image)

In [None]:
# setup device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Load instructBLIP model

In [None]:
model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_t5_instruct", model_type="flant5xl", is_eval=True, device=device)
#model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_t5_instruct", model_type="flant5xxl", is_eval=True, device=device)
#model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_vicuna_instruct", model_type="vicuna7b", is_eval=True, device=device)
#model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_vicuna_instruct", model_type="vicuna13b", is_eval=True, device=device)

In [None]:
vis_processors.keys()

In [None]:
txt_processors.keys()

In [None]:
#question = "Which city is this photo taken?"
#question = "What is the animal in the photo?"
question = "Is this photo possible in real world?"

In [None]:
# use "eval" processors for inference
image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
question = txt_processors["eval"](question)

samples = {"image": image, "text_input": question}

#### generative question answering

In [None]:
model.predict_answers(samples=samples, inference_method="generate")

#### ranking-based question answering

In [None]:
# rank answer candidates by their likelihood and return the best answer
answer_candidates = ["Singapore", "London", "Palo Alto", "Tokyo"]

model.predict_answers(samples, answer_list=answer_candidates, inference_method="rank")

#### Ask questions in batch

In [None]:
batch_size = 3

# create a batch of samples, could be multiple images or copies of the same image
image_batch = image.repeat(batch_size, 1, 1, 1)

# create a batch of questions, make sure the number of questions matches the number of images
question_1 = txt_processors["eval"]("Which city is this photo taken?")
question_2 = txt_processors["eval"]("What time is this during the day?")
question_3 = txt_processors["eval"]("Is it Singapore or London?")

question_batch = [question_1, question_2, question_3]

model.predict_answers(samples={"image": image_batch, "text_input": question_batch}, inference_method="generate")
