In [None]:
"""
The main code for the Strings-to-Vectors assignment. See README.md and Instructions for details.
"""
from typing import Sequence, Any

import numpy as np


class Index:
    """
    Represents a mapping from a vocabulary (e.g., strings) to integers.
    """

    def __init__(self, vocab: Sequence[Any], start=0):
        """
        Assigns an index to each unique item in the `vocab` iterable,
        with indexes starting from `start`.

        Indexes should be assigned in order, so that the first unique item in
        `vocab` has the index `start`, the second unique item has the index
        `start + 1`, etc.
        """
        self.start_index = start
        distinct_vocab = []
        seen = set()

        for item in vocab:
            if item not in seen:
                distinct_vocab.append(item)
                seen.add(item)
        self.vocab_mapping = {item: idx for idx, item in enumerate(distinct_vocab, start)}

    def objects_to_indexes(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a vector of the indexes associated with the input objects.

        For objects not in the vocabulary, `start-1` is used as the index.

        :param object_seq: A sequence of objects.
        :return: A 1-dimensional array of the object indexes.
        """
        return np.array([self.vocab_mapping.get(obj, self.start_index-1) for obj in object_seq])

    def objects_to_index_matrix(
            self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a matrix of the indexes associated with the input objects.

        For objects not in the vocabulary, `start-1` is used as the index.

        If the sequences are not all of the same length, shorter sequences will
        have padding added at the end, with `start-1` used as the pad value.

        :param object_seq_seq: A sequence of sequences of objects.
        :return: A 2-dimensional array of the object indexes.
        """
        max_length = max(len(seq) for seq in object_seq_seq)
        index_matrix = np.full((len(object_seq_seq), max_length), self.start_index-1)

        for i, seq in enumerate(object_seq_seq):
            indexes = self.objects_to_indexes(seq)
            index_matrix[i, :len(indexes)] = indexes

        return index_matrix

    def objects_to_binary_vector(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a binary vector, with a 1 at each index corresponding to one of
        the input objects.

        :param object_seq: A sequence of objects.
        :return: A 1-dimensional array, with 1s at the indexes of each object,
                 and 0s at all other indexes.
        """
        binary_vector = np.zeros(max(self.vocab_mapping.values()) + 1, dtype=int)
        indexes = self.objects_to_indexes(object_seq)
        if len(indexes) > 0:
            binary_vector[indexes] = 1
        return binary_vector

    def objects_to_binary_matrix(
            self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a binary matrix, with a 1 at each index corresponding to one of
        the input objects.

        :param object_seq_seq: A sequence of sequences of objects.
        :return: A 2-dimensional array, where each row in the array corresponds
                 to a row in the input, with 1s at the indexes of each object,
                 and 0s at all other indexes.
        """
        binary_matrix = np.zeros((len(object_seq_seq), max(self.vocab_mapping.values())+1), dtype=int)

        for i, seq in enumerate(object_seq_seq):
            indexes = self.objects_to_indexes(seq)
            binary_matrix[i, indexes] = 1

        return binary_matrix[:, :max(self.vocab_mapping.values())+1]


    def indexes_to_objects(self, index_vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of objects associated with the indexes in the input
        vector.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param index_vector: A 1-dimensional array of indexes
        :return: A sequence of objects, one for each index.
        """
        mapped_values = sorted([(key, value) for value, key in self.vocab_mapping.items()])
        return [mapped_values[index - self.start_index][1] for index in index_vector if self.start_index <= index < self.start_index + len(mapped_values)]

    def index_matrix_to_objects(
            self, index_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects associated with the indexes
        in the input matrix.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param index_matrix: A 2-dimensional array of indexes
        :return: A sequence of sequences of objects, one for each index.
        """
        result = []
        for row in index_matrix:
            row_result = [
                item
                for idx in row
                if (item := next(
                    (key for key, value in self.vocab_mapping.items() if value == idx and value != self.start_index - 1),
                    None,
                ))
            ]
            result.append(row_result)
        return result

    def binary_vector_to_objects(self, vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of the objects identified by the nonzero indexes in
        the input vector.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param vector: A 1-dimensional binary array
        :return: A sequence of objects, one for each nonzero index.
        """
        non_zero_indexes = np.nonzero(vector)[0]
        return [item for item, idx in self.vocab_mapping.items() if idx in non_zero_indexes]

    def binary_matrix_to_objects(
            self, binary_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects identified by the nonzero
        indices in the input matrix.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param binary_matrix: A 2-dimensional binary array
        :return: A sequence of sequences of objects, one for each nonzero index.
        """
        return [self.binary_vector_to_objects(row) for row in binary_matrix]
