In [10]:
import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
from shapely.geometry import Polygon, LineString, Point

In [24]:
import numpy as np

# Create a sample tensor
tensor = np.random.rand(32, 64, 2)

# Compute the differences along the second dimension
diff_tensor = np.diff(tensor, axis=1)

print(diff_tensor.shape)  # Output: (32, 63, 2)


(32, 63, 2)


In [28]:
import torch

# Create a sample tensor
tensor = torch.rand(32, 64, 2)

# Compute the differences along the second dimension
diff_tensor = tensor[:, 1:, :] - tensor[:, :-1, :]

print(diff_tensor.shape)  # Output: torch.Size([32, 63, 2])

  from .autonotebook import tqdm as notebook_tqdm


torch.Size([32, 63, 2])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define the encoder
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)

    def forward(self, input_seq):
        _, (h, c) = self.lstm(input_seq)
        return h, c

# Define the decoder
class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input_seq, h, c):
        batch_size, seq_len, _ = input_seq.shape
        z = h.reshape(batch_size, 1, -1).repeat(1, seq_len, 1)
        input_seq = torch.cat([z, input_seq], dim=-1)
        output, _ = self.lstm(input_seq, (h, c))
        output = self.fc(output)
        return output

# Define the sequence-based encoder-decoder model
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq):
        h, c = self.encoder(input_seq)
        output = self.decoder(input_seq, h, c)
        return output

# Training example
input_size = 2
hidden_size = 128
output_size = 2
seq_length = 10
batch_size = 32
num_epochs = 100

# Generate random training data
train_data = torch.randn(batch_size, seq_length, input_size)

# Initialize model, loss function, and optimizer
encoder = Encoder(input_size, hidden_size)
decoder = Decoder(input_size + hidden_size, hidden_size, output_size)
model = Seq2Seq(encoder, decoder)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    output = model(train_data)
    
    # Compute loss
    loss = criterion(output, train_data)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    
    # Print progress
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


In [164]:
from sklearn.neighbors import NearestNeighbors
import numpy as np
from sklearn.model_selection import train_test_split


def compute_NN(X, Y):
    X, Y = np.array(X), np.array(Y)
    neigh = NearestNeighbors(n_neighbors = 1)
    neigh.fit(X)

    knn_array = neigh.kneighbors(return_distance=False)
    knn_array = knn_array.reshape(1, -1)    # for k=1, we can flaten the array
    matches = Y[knn_array] == Y     # Match nns' labels with corresponding y
    avg_acc = matches.mean()

    return avg_acc

def compute_FT(X, Y, k=None):
    X, Y = np.array(X), np.array(Y)
    types, k_list = np.unique(Y, return_counts=True)
    neigh = NearestNeighbors(n_neighbors = k)
    neigh.fit(X)

    avg_acc = 0

    for i in range(len(types)):
        X_t, k = X[Y == types[i]], k_list[i]
        knn_array = neigh.kneighbors(X_t, n_neighbors=k, return_distance=False)     # Get knn for every instance
        Y_t = np.full(knn_array.shape, types[i])    # create ground truth for every nn
        matches = Y[knn_array] == Y_t    # Match knns' labels with corresponding y
        avg_acc += (matches.sum(axis=-1)/k).mean()   # Calulate acc of each row (instance) and average them

    return avg_acc/len(k_list)


def compute_ST(X, Y, k=None):
    X, Y = np.array(X), np.array(Y)
    types, k_list = np.unique(Y, return_counts=True)
    neigh = NearestNeighbors(n_neighbors = k)
    neigh.fit(X)

    avg_acc = 0

    for i in range(len(types)):
        X_t, k = X[Y == types[i]], k_list[i]
        knn_array = neigh.kneighbors(X_t, n_neighbors=2*k, return_distance=False)     # Now we look at top 2*k nns
        Y_t = np.full(knn_array.shape, types[i])    # create ground truth for every nn
        matches = Y[knn_array] == Y_t    # Match knns' labels with corresponding y
        avg_acc += (matches.sum(axis=-1)/k).mean()   # Calulate acc of each row (instance) and average them

    return avg_acc/len(k_list)

    # return round(correct*1.0/k, 3), knn_list

In [165]:
batch_size = 64
loaded = np.load("dataset/train_building_shape_5k.npz")

split_ratio = 0.2
train_tokens, val_tokens, train_labels, val_labels = train_test_split(loaded["train_tokens"], loaded["train_labels"], test_size=split_ratio, random_state=42)
# train_tokens, train_labels = loaded["train_tokens"], loaded["train_labels"]

train_tokens = train_tokens.reshape(train_tokens.shape[0], -1)

In [167]:
# compute_NN(train_tokens, train_labels)
compute_FT(train_tokens, train_labels), compute_ST(train_tokens, train_labels)


(0.428367780917627, 0.5544760435345489)

In [168]:
from utils.prepare_dataset import prepare_dataset_mnist
max_seq_len = 64
batch_size = 64
dataset_size = None
with_mask = False

train_tokens, train_labels, train_mask, val_tokens, val_labels, val_mask = prepare_dataset_mnist(file="dataset/building_shapes_5010.csv",
                                                                                                 with_mask=with_mask,
                                                                                                 split_ratio=0.2,
                                                                                                 dataset_size=dataset_size,
                                                                                                 max_seq_len=max_seq_len,
                                                                                                 train=True)

train_tokens = train_tokens[:, :, :2]
train_tokens = train_tokens.reshape(train_tokens.shape[0], -1)

In [169]:
compute_FT(train_tokens, train_labels), compute_ST(train_tokens, train_labels)

(0.15763092303377366, 0.257442129680529)