In [120]:
import torch
from dataset_utils import edge_stratified_split
from classification_dataset import ClassificationDataset
from  config.parser_config import config_parser
from torch.utils.data import DataLoader

# GET EDGE DATA

In [121]:
parser = config_parser()
argv = '--config localtest/localtest_config'.split(' ') 
args  = parser.parse_args(argv)
full_dataset = ClassificationDataset(
        one_hot = False,
        augmentation= None,
        npz_path= args.npz_path,
        image_path= args.image_path,
        label_path= args.label_path,
        size = args.size,
        normalize= True)

edge_classes = ["Gryllteiste","Schnatterente","Buchfink","unbestimmte Larusmöwe",
                    "Schmarotzer/Spatel/Falkenraubmöwe","Brandgans","Wasserlinie mit Großalgen",
                    "Feldlerche","Schmarotzerraubmöwe","Grosser Brachvogel","unbestimmte Raubmöwe",
                    "Turmfalke","Trauerseeschwalbe","unbestimmter Schwan",
                    "Sperber","Kiebitzregenpfeifer",
                    "Skua","Graugans","unbestimmte Krähe"]

edge_labels = [full_dataset._get_label_from_cat(cat) for cat in edge_classes]

edge_train_data, edge_test_data = edge_stratified_split(full_dataset, full_labels = full_dataset._labels, edge_labels = edge_labels,  fraction = 0.8, random_state = 0)                     
trainloader = DataLoader(edge_train_data,
                                batch_size=1, 
                                shuffle=True,
                                num_workers=args.num_workers)

testloader = DataLoader(edge_test_data,
                                batch_size=1, 
                                shuffle=True,
                                num_workers=args.num_workers)

In [None]:
# train_dict = {}
# for _, label, cat, name in trainloader:
#     cat = cat[0]
#     name = name[0]
#     if cat in train_dict.keys():
#         train_dict[cat].append(name)
#     else:
#         train_dict[cat] = [name]
        

# test_dict = {}
# for _, label, cat, name in testloader:
#     cat = cat[0]
#     name = name[0]
#     if cat in test_dict.keys():
#         test_dict[cat].append(name)
#     else:
#         test_dict[cat] = [name]

In [None]:
to_dir = '/Users/thang/Documents/Thang/edge_cases'

In [None]:
# import os
# import shutil 
# for cat in train_dict.keys():
#     for image in train_dict[cat]:
#         from_loc = os.path.join(args.image_path, image)        
#         train_dir = os.path.join(to_dir, cat, 'train')
#         if not os.path.exists(train_dir):
#             os.makedirs(train_dir)
#         to_loc = os.path.join(train_dir, image)
#         shutil.copy(from_loc, train_dir)
        

In [None]:
# for cat in test_dict.keys():
#     for image in test_dict[cat]:
#         from_loc = os.path.join(args.image_path, image)        
#         train_dir = os.path.join(to_dir, cat, 'test')
#         if not os.path.exists(train_dir):
#             os.makedirs(train_dir)
#         to_loc = os.path.join(train_dir, image)
#         shutil.copy(from_loc, train_dir)

# GAN

In [None]:
import torch
from model.vanilla_cgan import Generator
import torch.nn as nn
device = torch.device('cpu')
map_location=torch.device('cpu')
class_num = 118
img_size = 256
model_dim = 512
lr = 0.00005
batch_size = 9
z_size = 50
generator_layer_size = [model_dim, model_dim*2, model_dim*4]
generator = Generator(generator_layer_size, z_size, img_size, class_num)
# discriminator = Discriminator(discriminator_layer_size, img_size, class_num).to(device)
criterion = nn.BCELoss()
g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
# d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr)

In [None]:
map_location=torch.device('cpu')
g_checkpoint= torch.load('./saved_models/vanilla_gan/generator_TRAIN_cGAN,_dim 512,_lr _5e-05,_epochs _10000,_size _256.pt', map_location=torch.device('cpu'))
generator.load_state_dict(g_checkpoint['model_state_dict'])
g_optimizer.load_state_dict(g_checkpoint['optimizer_state_dict'])
epoch = g_checkpoint['epoch']
g_loss = g_checkpoint['loss']

In [None]:
from torch.autograd import Variable
import numpy as np
std = Variable(torch.rand(batch_size, z_size)*5).to(device)    
z = torch.normal(mean=0, std=std)
fake_labels = Variable(torch.LongTensor([64]*9)).to(device)
raw_fake_images = generator(std, fake_labels)
fake_images = torch.round(raw_fake_images*127.5 + 127.5).float()

In [None]:
import matplotlib.pyplot as plt
import torchvision

def show(img):
    npimg = (img.numpy()*255).astype(np.uint8)
    plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')
    plt.gcf().set_dpi(500)

grid = torchvision.utils.make_grid(fake_images, nrow=5, ncol = 5, padding=20)
show(grid)

In [125]:
edge_labels

[29, 64, 9, 103, 62, 7, 90, 17, 63, 28, 106, 87, 83, 112, 71, 35, 69, 27, 102]

# WASSERSTEIN GAN

In [123]:
from model.wasserstein_cgan import Discriminator, Generator, initialize_weights
parser = config_parser()
argv = '--config localtest/local_gan_config'.split(' ') 
args  = parser.parse_args(argv)
LEARNING_RATE = args.lr
BATCH_SIZE = args.batch_size
IMAGE_SIZE = args.size
CHANNELS_IMG = 3
Z_DIM = args.latent_size
NUM_EPOCHS = args.epochs
FEATURES_CRITIC = args.model_dim
FEATURES_GEN = args.model_dim
CRITIC_ITERATIONS = 7
WEIGHT_CLIP = 0.01
full_dataset = ClassificationDataset(
        one_hot = False,
        augmentation= None,
        npz_path= args.npz_path,
        image_path= args.image_path,
        label_path= args.label_path,
        size = args.size,
        normalize= True)

edge_classes = ["Gryllteiste","Schnatterente","Buchfink","unbestimmte Larusmöwe",
                    "Schmarotzer/Spatel/Falkenraubmöwe","Brandgans","Wasserlinie mit Großalgen",
                    "Feldlerche","Schmarotzerraubmöwe","Grosser Brachvogel","unbestimmte Raubmöwe",
                    "Turmfalke","Trauerseeschwalbe","unbestimmter Schwan",
                    "Sperber","Kiebitzregenpfeifer",
                    "Skua","Graugans","unbestimmte Krähe"]

edge_labels = [full_dataset._get_label_from_cat(cat) for cat in edge_classes]

edge_train_data, _ = edge_stratified_split(full_dataset, full_labels = full_dataset._labels, edge_labels = edge_labels,  fraction = 0.8, random_state = 0)                     
loader = DataLoader(edge_train_data,
                                batch_size=BATCH_SIZE, 
                                shuffle=True,
                                num_workers=args.num_workers)

CLASS_NUM = full_dataset._get_num_classes()

GEN = Generator(Z_DIM, CHANNELS_IMG, FEATURES_GEN, CLASS_NUM)

In [124]:
map_location=torch.device('cpu')
g_checkpoint= torch.load('./saved_models/waasserstein_gan/generator_TRAIN_cGAN,_dim:128,_lr:_0.001,_epochs:_5000,_size:_256.pt', map_location=torch.device('cpu'))
GEN.load_state_dict(g_checkpoint['model_state_dict'])

<All keys matched successfully>

In [None]:
noise = torch.randn(5, Z_DIM, 1, 1).to(device)
labels = torch.zeros(5, 1, 1, 1).to(device)
fake = GEN(noise, labels)

In [None]:
labels = torch.arange(

# GAN ADDED IMAGES

In [None]:
import cv2
import os
import math
import pandas as pd
import numpy as np
import csv
data = pd.read_csv(args.label_path,index_col=False)[['tp','name','file']]
data = data[data['file'].isin(full_dataset._image_names)]
max_count = data[['file','name']].groupby('name').count().sort_values(by='name', ascending = False)
to_dir = 'gan_generated_data'
all_dir = os.path.join(to_dir, 'all_fake')
if not os.path.exists(all_dir):
        os.makedirs(all_dir)
csv_info = []
for label_pos in range(len(edge_labels)):
    if True:
        label = edge_labels[label_pos]
        cat = edge_classes[label_pos]
        max = math.floor(max_count[max_count.index ==cat]['file'].iloc[0]*0.8)
        count = 0
        if True:
            
            for i in range(0, 10, 3):
                z = Variable(torch.rand(batch_size, z_size)*i).to(device) 
                fake_labels = Variable(torch.LongTensor([label]*9)).to(device)  
                raw_fake_images = generator(z, fake_labels)
                fake_images = torch.round(raw_fake_images*127.5 + 127.5).float()
                s = fake_images[0].permute(1, 2, 0).detach().numpy()
                fake_dir = os.path.join(to_dir, cat, 'fake')
                image_name = '{}_{}.png'.format(cat, i).replace(' ', '_').replace("ä", "ae").replace("ö", "oe").replace("ü", "ue").replace("ß", "ss").replace("/", "_")
                cv2.imwrite(os.path.join(all_dir, image_name), s)
                csv_info.append([image_name, cat])
                # if not os.path.exists(fake_dir):
                    # os.makedirs(fake_dir)
                # cv2.imwrite(os.path.join(fake_dir, '{}_{}.png'.format(cat, i)), s)
                # train_dir = os.path.join(to_dir, cat, 'train')
                # cv2.imwrite(os.path.join(train_dir, '{}_{}.png'.format(cat, i)), s)
                count += 1
                if count >= max//2:
                    break 

csv_path = os.path.join(to_dir, 'gan_info.csv')
with open(csv_path, 'w') as f:
    # using csv.writer method from CSV package
    write = csv.writer(f)
    write.writerow(['image_name', 'category'])
    write.writerows(csv_info)

In [None]:
import math
count = data[['file','name']].groupby('name').count().sort_values(by='name', ascending = False)
math.floor(count[count.index =='Gryllteiste']['file'].iloc[0]*0.8)

In [None]:
info = pd.read_csv(csv_path, index_col=False)
a = info['image_name'].tolist()



In [6]:
parser = config_parser()
argv = '--config localtest/localtest_config'.split(' ') 
args  = parser.parse_args(argv)

import classification_dataset
from dataset_utils import stratified_split
full_dataset = classification_dataset.ClassificationDataset(
        one_hot = False,
        augmentation= None,
        npz_path= args.npz_path,
        image_path= args.image_path,
        label_path= args.label_path,
        size = args.size,
        normalize= True)
train_data, train_set_labels, validation_data, test_set_labels = stratified_split(dataset = full_dataset, 
                                                                                            labels = full_dataset._labels,
                                                                                            fraction = 0.8,
                                                                                            random_state=0)        
    

train_dataloader = DataLoader(train_data,
                                batch_size=8, 
                                shuffle=True,
                                num_workers=8)

validation_dataloader = DataLoader(validation_data, 
                                batch_size=8, 
                                shuffle=True,
                                num_workers=8)

In [44]:
for img, label, cat in validation_dataloader:
        mask = sum(label==i for i in edge_labels).bool()
        indices = torch.nonzero(mask).flatten()
        cat  = np.array(cat)[mask]
        img = img[indices]
        label = label[indices]
        break



In [48]:
len(label)

0

In [42]:
import numpy as np
np.array(c)[mask]

array([], dtype='<U35')

In [41]:
mask = [False]*8
mask

[False, False, False, False, False, False, False, False]

In [None]:
a = torch.Tensor([1, 2, 3, 4, 5])
b = torch.Tensor([1, 2, 5])

In [None]:
mask = sum(a==i for i in b).bool()
indices = torch.nonzero(mask).flatten()
indices

# ILLUSTRATION OF GAN PICS

In [76]:
edge_case_dir = '/Users/thang/Documents/Thang/edge_cases'
edge_classes = ["Gryllteiste","Schnatterente","Buchfink","unbestimmte Larusmöwe",
                    "Schmarotzer/Spatel/Falkenraubmöwe","Brandgans","Wasserlinie mit Großalgen",
                    "Feldlerche","Schmarotzerraubmöwe","Grosser Brachvogel","unbestimmte Raubmöwe",
                    "Turmfalke","Trauerseeschwalbe","unbestimmter Schwan",
                    "Sperber","Kiebitzregenpfeifer",
                    "Skua","Graugans","unbestimmte Krähe"]

In [105]:
from pathlib import Path
from torchvision.io import read_image
dict = {}

for edge_class in edge_classes:
    for type in ['train', 'test', 'fake']:
        dir_path = Path(edge_case_dir, edge_class, type)
        if type == 'fake':
            img_paths = list(dir_path.glob('*.png'))
        else:
            img_paths = list(dir_path.glob('*.jpg'))
        # print(img_paths)
        
        to_add = []
        for img_path in img_paths:
            img = read_image(str(img_path))
            img = torchvision.transforms.Resize(size = (500,500))(img)
            to_add.append(img)
        name = '{}_{}'.format(edge_class, type)
        dict[name] = to_add    





In [113]:
from torchvision.utils import make_grid
def img_grid(edge_class, dict = dict):
    train = '{}_train'.format(edge_class)
    test = '{}_train'.format(edge_class)
    fake = '{}_fake'.format(edge_class)

    # 
    train_pics = dict[train]
    # print(len(train_pics))
    test_pics = dict[test]
    fake_pics = dict[fake]
    # print(len(fake_pics))
    if len(train_pics) <= 3:
        grid_list = [train_pics[0], train_pics[1]]
    else:
        grid_list = [train_pics[0], train_pics[1], train_pics[2]]
    Grid = make_grid(grid_list, nrow=4, padding=25)
    return Grid

In [119]:
grids = []
for edge_class in edge_classes:
    grid = img_grid(edge_class)
    grids.append(grid)


In [None]:
import matplotlib.pyplot as plt
import torchvision

def show_transform(pics):
  rows= 3
  fig_size = (15,10)
  pic_title_size = fig_size[0]
  fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=fig_size)
  #
  plt.imshow()
  j = 0    
  for pic in pics:
    axes[i, 0].imshow(np.transpose(pic, (1,2,0)), interpolation='nearest')
    axes[1, j].imshow(aug_img.astype(np.uint8))
    axes[1, j].tick_params(left = False, right = False , labelleft = False ,
                labelbottom = False, bottom = False)
    axes[0, j].tick_params(left = False, right = False , labelleft = False ,
                labelbottom = False, bottom = False)
    axes[0, j].set_title(names[j], fontsize = pic_title_size, pad= pic_title_size)
    j+=1
  if title:
    fig.suptitle(title, fontsize = pic_title_size*1.5)
  return img, np.round(aug_img*0.5 + 127.5)

