In [None]:
%cd ..

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import random
import networkx as nx
from sklearn import preprocessing
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import dgl.function as fn
from dgl.nn.functional import edge_softmax

In [None]:
from src import config
from src.utils import *

In [None]:
%load_ext autoreload
%autoreload 2

---

In [None]:
path = f"{config.DATA_DIR}/GCD/train"

In [None]:
image_files = glob.glob(
        os.path.join(path,"**/*.jpg"), recursive=True
    )

In [None]:
random.shuffle(image_files)

In [None]:
batch_size = 16
batch_img_paths = image_files[:batch_size]

In [None]:
targets = [os.path.basename(x).split("_")[1] for x in batch_img_paths]

In [None]:
df = pd.DataFrame({"img_path":batch_img_paths, "target": targets})

In [None]:
le = preprocessing.LabelEncoder()
df['label']= le.fit_transform(df['target'])

### Read images

In [None]:
images = np.array([readImage(path) for path in batch_img_paths])

#NORM
images=(images-np.mean(images))/np.std(images)

In [None]:
df['target'].value_counts()

In [None]:
plt.figure(figsize=(8,4))
df['target'].value_counts().plot.bar()
plt.grid(axis='x')
plt.xticks(rotation=0)
plt.title('Numero de imagenes por tipo en el batch')
plt.show()

### DEEP FEATURES

In [None]:
cnn = nn.Sequential(*(list(models.resnet50(pretrained=True).children())[:-1]))

In [None]:
batch_images = torch.tensor(images ,dtype=torch.float)

In [None]:
with torch.no_grad():
    features = cnn(torch.tensor(images).float()).reshape(-1,2048)

In [None]:
features.shape

### SIMILARITY AND ADJACENCY MATRIX

In [None]:
THRESHOLD=0.7

In [None]:
# Normalization
norm = features.norm(dim=1).view(-1,1)
batch = features/norm

# Cosine Similarity Matrix NxN
sim_matrix = batch @ batch.T

#Adjacency Matrix NxN
adj_matrix = torch.where(sim_matrix>THRESHOLD, 1, 0)

In [None]:
adj_matrix

### Plot

In [None]:
G = nx.from_numpy_matrix(adj_matrix.numpy(), create_using=nx.Graph)

In [None]:
plt.figure(1,figsize=(7,5)) 
nx.draw(G, nx.spring_layout(G, k=10/np.sqrt(G.number_of_nodes())), with_labels=True, node_color=df['label'], font_color="white", font_weight="bold")
plt.title('Grafo de representacion de imagenes de nubes')
plt.show()

#### DGL

In [None]:
row, col = torch.where(adj_matrix==1)

In [None]:
g = dgl.graph((row, col))

In [None]:
g 

### GCN Layer test

In [None]:
gcd_layer = dgl.nn.GraphConv(2048, 512)

mp_features = gcd_layer(g, features)
print(mp_features.shape)

---

## CAC COEFFICIENTS

#### 1. Sample z neighbours for each node, excluding itself

In [None]:
#OVERWRITE 
sim_matrix.fill_diagonal_(0)

num_neighbours = 2
indices = torch.topk(sim_matrix, num_neighbours, dim=1).indices

In [None]:
# SAMPLE NEIGHBOURHOOD 
neighbours = features[indices]

### CAC computation

In [None]:
u1 = nn.Linear(2048, 256, bias=False)
v1 = nn.Linear(2048, 256, bias=False)

u2 = nn.Linear(2048, 256, bias=False)
v2 = nn.Linear(2048, 256, bias=False)

In [None]:
feat_src = u1(features)
feat_dst = v1(features)

neigh_src = u2(neighbours).mean(1)
neigh_dst = v2(neighbours).mean(1)

In [None]:
def edge_udf(edges):
    return {'raw_coef': (edges.src['feat_src']*edges.src['feat_dst']).sum(1) + (edges.dst['neigh_src']*edges.dst['neigh_dst']).sum(1)/num_neighbours**2}

In [None]:
with g.local_scope():
    
    g.ndata['feat_src'] = feat_src
    g.ndata['feat_dst'] = feat_dst
    g.ndata['neigh_src'] = neigh_src
    g.ndata['neigh_dst'] = neigh_dst
    
    g.apply_edges(edge_udf)
    g.edata['cac'] = edge_softmax(g, g.edata['raw_coef'])
    g.update_all(fn.u_mul_e('feat_src', 'cac', 'm'), fn.mean('m', 'h'))
    
    print(g.dstdata['h'].shape)