Todo:

Write function that, given a reference set, finds the K (hyperparam) nearest neighbors in the 7d feature space and returns the average vote for those neighbors

Cross validate on val sets



Conditions
"Train" on domain, eval on same domain (try on each of the 4)
"Train" on n domains, eval on 1 (1 vs 1, 2 vs 1, 3 vs 1)

In [91]:
from collections import defaultdict
import csv
from itertools import islice
from functools import reduce
import os
import sys

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Image
import scipy
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm_notebook
import torch
from torchvision import models, transforms, datasets

In [58]:
#NOTE: Votes are counted as [fake, real], not [incorrect, correct] !

dataset_counts = defaultdict(int)
img_counts = defaultdict(lambda : defaultdict(int))
unique_imgs = defaultdict(set)
img_votes = defaultdict(lambda: defaultdict(lambda: [0,0])) #no,yes tuples by dataset and image

with open('../../all_gans_inf.csv') as f:
    reader = csv.DictReader(f)
    for row in islice(reader, None):
        img = row['img']        
#         dataset_name = img[:img.index('/')]
        split_name = img.split('/')
        assert(len(split_name) == 2)
        dataset_name, img_name = split_name
        
        vote_index = 1 if row['correctness'] == row['realness'] else 0
        if dataset_name == 'began5000' and row['realness'] == 'True': print(row)

        img_votes[dataset_name][img_name][vote_index] += 1
        
        dataset_counts[dataset_name] += 1
        img_counts[dataset_name][img_name] += 1
unique_imgs = {dataset: len(img_counts[dataset]) for dataset in img_counts}

In [59]:
progan_votes = img_votes['progan5000']
began_votes = img_votes['began5000']
stylegan_votes = img_votes['styleganceleba5000']
len(progan_votes), len(began_votes), len(stylegan_votes)

(2233, 2397, 3103)

In [60]:
progan_filenames = torch.load('progan_filenames.pt', map_location=torch.device('cpu'))
progan_filenames = [os.path.split(x)[-1] for x in progan_filenames]
progan_distance_features = torch.load('progan_distance_features.pt', map_location=torch.device('cpu'))

stylegan_filenames = torch.load('stylegan_filenames.pt', map_location=torch.device('cpu'))
stylegan_filenames = [os.path.split(x)[-1] for x in stylegan_filenames]
stylegan_distance_features = torch.load('stylegan_distance_features.pt', map_location=torch.device('cpu'))

began_filenames = torch.load('began_filenames.pt', map_location=torch.device('cpu'))
began_filenames = [os.path.split(x)[-1] for x in began_filenames]
began_distance_features = torch.load('began_distance_features.pt', map_location=torch.device('cpu'))

wgan_filenames = torch.load('wgan_filenames.pt', map_location=torch.device('cpu'))
wgan_filenames = [os.path.split(x)[-1] for x in wgan_filenames]
wgan_distance_features = torch.load('wgan_distance_features.pt', map_location=torch.device('cpu'))

In [61]:
with open('../progan_train_set.txt') as f:
    progan_train_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(progan_train_files[:5])

with open('../progan_val_set.txt') as f:
    progan_val_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(progan_val_files[:5])

with open('../stylegan_train_set.txt') as f:
    stylegan_train_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(stylegan_train_files[:5])

with open('../stylegan_val_set.txt') as f:
    stylegan_val_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(stylegan_val_files[:5])

with open('../began_train_set.txt') as f:
    began_train_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(began_train_files[:5])

with open('../began_val_set.txt') as f:
    began_val_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(began_val_files[:5])

with open('../wgan_train_set.txt') as f:
    wgan_train_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(began_train_files[:5])

with open('../wgan_val_set.txt') as f:
    wgan_val_files = [os.path.split(x.strip())[-1] for x in f.readlines()]
#print(began_val_files[:5])

In [62]:
progan_features_by_file = {progan_filenames[i] : progan_distance_features[i] for i in range(len(progan_filenames))}
print(len(progan_features_by_file))

stylegan_features_by_file = {stylegan_filenames[i] : stylegan_distance_features[i] for i in range(len(stylegan_filenames))}
print(len(stylegan_features_by_file))

began_features_by_file = {began_filenames[i] : began_distance_features[i] for i in range(len(began_filenames))}
print(len(began_features_by_file))

wgan_features_by_file = {wgan_filenames[i] : wgan_distance_features[i] for i in range(len(wgan_filenames))}
print(len(wgan_features_by_file))

2233
3103
1966
4251


In [68]:
progan_train_feats = np.array([progan_features_by_file[x] for x in progan_train_files])
print(progan_train_feats.shape)

stylegan_train_feats = np.array([stylegan_features_by_file[x] for x in stylegan_train_files])
print(stylegan_train_feats.shape)

began_train_feats = np.array([began_features_by_file[x] for x in began_train_files])
print(began_train_feats.shape)

wgan_train_feats = np.array([wgan_features_by_file[x] for x in wgan_train_files])
print(wgan_train_feats.shape)

(1787, 7)
(2483, 7)
(1574, 7)
(3401, 7)


In [102]:
correct_guesses = 0
incorrect_guesses = 0

K = 250

for x in islice(progan_val_files, None):
    #print(x)
    feats = progan_features_by_file[x]
    dists = np.linalg.norm(feats - progan_train_feats, axis=1)
    #nn_index = np.argmin(dists)
    #nn = progan_train_files[nn_index]
    
    kNN = sorted(enumerate(dists), key = lambda x: x[1])[:K]
    kNN_votes = reduce(lambda x, y: [x[0] + y[0], x[1] + y[1]], [progan_votes[progan_train_files[z[0]]] for z in kNN])
    
    fake_votes, real_votes = kNN_votes
    pred_real = real_votes >= fake_votes
    
    val_fake_votes, val_real_votes = progan_votes[x]
    
    if pred_real:
        correct_guesses += val_real_votes
        incorrect_guesses += val_fake_votes
    else:
        correct_guesses += val_fake_votes
        incorrect_guesses += val_real_votes

print(correct_guesses, incorrect_guesses, correct_guesses / (correct_guesses + incorrect_guesses))

190 116 0.6209150326797386


In [75]:
for x in islice(began_val_files,50):
    #print(x)
    feats = began_features_by_file[x]
    dists = np.linalg.norm(feats - began_train_feats, axis=1)
    nn_index = np.argmin(dists)
    nn = began_train_files[nn_index]
    #print(began_votes[nn])
    

In [94]:
kNN = sorted(enumerate(dists), key = lambda x: x[1])[:5]
print(kNN)

reduce(lambda x, y: [x[0] + y[0], x[1] + y[1]], [progan_votes[progan_train_files[x[0]]] for x in kNN])

[(1309, 0.36460143), (373, 0.40736032), (831, 0.47139472), (1171, 0.49929243), (828, 0.5257756)]


[4, 2]

In [85]:
np.argmin(dists)

1309