In [None]:
# Try 
# 1. training on all cells
# 2. traning only on cells with transition and then just set others to 0

In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import torch
import pandas as pd
import numpy as np
import pickle

from model_classes import SkipGramModel, Node2VecModel, SFCModel, GAEModel, GATEncoder, GCNEncoder
from pipelines.utils import ROOT_DIR, load_config
from models.utils import meters2lonlat, lonlat2meters
from torch_geometric.utils.convert import from_scipy_sparse_matrix
from scipy.sparse import csr_matrix


os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cpu' if torch.cuda.is_available() else 'cpu')

In [None]:
#### Transition Matrix ####
trans_mx = np.load('cell_trans-mx.npz')['arr_0']
trans_sym_mx = trans_mx + trans_mx.T
# Select only those cells with connections
cells_idx = np.any(trans_sym_mx, axis=0)


In [None]:
trans_clipped = trans_sym_mx[cells_idx][:, cells_idx]

In [None]:
neg_idx = ~cells_idx

In [None]:
neg_idx = ~cells_idx
trans_add_selfloops = trans_sym_mx[np.ix_(neg_idx, neg_idx)] = np.eye(neg_idx.sum()) * 1

In [None]:
def preprocess_trans_mx(trans_clipped, self_weight=0.5):
    # Def normalizing rows
    def normalize_rows(matrix, axis=1):
        row_sums = np.sum(matrix, axis=axis)
        normalized_matrix = matrix / row_sums[:, np.newaxis]
        return normalized_matrix
    
    # As we normalize we need to adjust for that
    calc_weight = - self_weight / (self_weight-1)

    # We normalize first to havee normal weights
    norm_mx = normalize_rows(trans_clipped)
    # Than we add the diagonal, such that after second normalization we have self_weight as diagonal.
    np.fill_diagonal(norm_mx, calc_weight)
    norm_mx = normalize_rows(norm_mx)
    return norm_mx


In [None]:
# Ether trans_add_selfloops or trans_clipped
trans_mx_pp = preprocess_trans_mx(trans_add_selfloops, self_weight=0.5)

In [None]:
data_config = load_config(name='porto', ctype="dataset")

# Load CellSpace
dataset_cell_file = f"{data_config['city']}_cell{int(data_config['cell_size'])}_cellspace.pkl"
file_path = os.path.join(ROOT_DIR, "models/road_embs/", dataset_cell_file)
with open(file_path, 'rb') as fh:
    cs = pickle.load(fh)

# Load Feats Matrix
feats_mx_file = f"{data_config['city']}_cell{int(data_config['cell_size'])}_feats_mx.pkl"
file_path = os.path.join(ROOT_DIR, "models/road_embs/", feats_mx_file)
with open(file_path, 'rb') as fh:
    feats_mx = pickle.load(fh)



In [None]:
feats_mx[cells_idx].shape

In [None]:
from sklearn import preprocessing
feats_mx_norm = preprocessing.normalize(feats_mx)

In [None]:
# Create edge_index from transition matrix
adj_mx = (trans_mx_pp > 0).astype(int)
adj_t = csr_matrix(adj_mx)
edge_index_torch = from_scipy_sparse_matrix(adj_t)[0]

# To torch tensor
feats_mx_torch = torch.tensor(feats_mx_norm, dtype = torch.long, device = device)
#edge_index_torch = torch.tensor(edge_index, dtype = torch.long, device = device)

# Create PYG Data Set
from torch_geometric.data import Data
data = Data(x=feats_mx_torch, edge_index=edge_index_torch)

data

In [None]:
import torch_geometric.transforms as T
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    T.OneHotDegree(20), # training without features
    T.ToDevice(device),
])
data = transform(data)
print(data.x.shape)



In [None]:
emb_dim = 128
model = GAEModel(data, device=device, encoder=GATEncoder, emb_dim=emb_dim)
model.train(epochs=10000)

In [None]:
model = SFCModel(data, adj=trans_mx_pp, device=device, layers=2, add_edge_degree=False)
model.train(epochs=10000)

In [None]:
emb_mx = model.load_emb()

In [None]:
cells_idx.shape[0]

In [None]:
# Now we need to fill an embeddings matrix with the subset we learned the mebeddings for
mx = torch.zeros((cells_idx.shape[0], emb_dim))
mx[cells_idx] = emb_mx.cpu().detach()
# WARING: THIS WORKS NOT GOOD, as during training we select those zero embeddings

In [None]:
# Idea 2: Get node2vec embs and fill them in
mx = node2vec_emb
mx[cells_idx] = emb_mx

In [None]:
model_emb = mx
print(model_emb.shape)

# Save it for trajectory embeddings
save_path ="/models/road_embs"

# Safe embeddings
#model_emb = torch.from_numpy(model_emb)
model_name = "sfc"
city = "porto"
add = f""
embs_file = f"{city}_cell_embs_{model_name}{add}.pkl"
embs_file = os.path.join(save_path, embs_file)
with open(embs_file, 'wb') as fh:
    pickle.dump(model_emb, fh, protocol = pickle.HIGHEST_PROTOCOL)
    print("Saved to: ", embs_file)

### Concat GAE + Node2Vec

In [None]:
# Lead embeddings with pickle
import pickle
embs_file = "trajemb/models/road_embs/porto_cell_embs_node2vec.pkl"
with open(embs_file, 'rb') as fh:
    node2vec_emb = pickle.load(fh)
    print("Loaded from: ", embs_file)
print(node2vec_emb.shape)

In [None]:
# Lead embeddings with pickle
embs_file = "trajemb/models/road_embs/porto_cell_embs_gae.pkl"
with open(embs_file, 'rb') as fh:
    gae_emb = pickle.load(fh)
    print("Loaded from: ", embs_file)
print(gae_emb.shape)

In [None]:
node2vec_emb = node2vec_emb.cpu().detach()

In [None]:
# Concat torch tensors on dim 1
import torch
emb_concat = torch.cat((gae_emb, node2vec_emb.cpu().detach()), dim=1)

In [None]:
# Save it for trajectory embeddings
save_path ="/trajemb/models/road_embs"

# Safe embeddings
import os
emb = emb_concat
model_name = "node2vec_gae"
city = "porto"
add = ""
embs_file = f"{city}_cell_embs_{model_name}{add}.pkl"
embs_file = os.path.join(save_path, embs_file)
with open(embs_file, 'wb') as fh:
    pickle.dump(emb, fh, protocol = pickle.HIGHEST_PROTOCOL)
    print("Saved to: ", embs_file)