In [None]:
"""
The main code for the Strings-to-Vectors assignment. See README.md and Instructions for details.
"""
from typing import Sequence, Any
import numpy as np

class Index:
    """
    Represents a mapping from a vocabulary (e.g., strings) to integers.
    """

    def __init__(self, vocab: Sequence[Any], start=0):
        """
        Assigns an index to each unique item in the `vocab` iterable,
        with indexes starting from `start`.

        Indexes should be assigned in order, so that the first unique item in
        `vocab` has the index `start`, the second unique item has the index
        `start + 1`, etc.
        """
        self.vocab = {}
        self.start = start
        current_index = start
        for item in vocab:
            if item not in self.vocab:
                self.vocab[item] = current_index
                current_index += 1

    def objects_to_indexes(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a vector of the indexes associated with the input objects.

        For objects not in the vocabulary, `start-1` is used as the index.

        :param object_seq: A sequence of objects.
        :return: A 1-dimensional array of the object indexes.
        """
        return np.array([self.vocab.get(obj, self.start - 1) for obj in object_seq])

    def objects_to_index_matrix(self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a matrix of the indexes associated with the input objects.

        For objects not in the vocabulary, `start-1` is used as the index.

        If the sequences are not all of the same length, shorter sequences will
        have padding added at the end, with `start-1` used as the pad value.

        :param object_seq_seq: A sequence of sequences of objects.
        :return: A 2-dimensional array of the object indexes.
        """
        max_len = max(len(seq) for seq in object_seq_seq)
        padded_seqs = [seq + [None] * (max_len - len(seq)) for seq in object_seq_seq]
        return np.array([[self.vocab.get(obj, self.start - 1) for obj in seq] for seq in padded_seqs])

    def objects_to_binary_vector(self, object_seq: Sequence[Any]) -> np.ndarray:
        """
        Returns a binary vector, with a 1 at each index corresponding to one of
        the input objects.

        :param object_seq: A sequence of objects.
        :return: A 1-dimensional array, with 1s at the indexes of each object,
                 and 0s at all other indexes.
        """
        binary_vector = np.zeros(len(self.vocab) + self.start, dtype=int)
        for obj in object_seq:
            if obj in self.vocab:
                binary_vector[self.vocab[obj]] = 1
        return binary_vector

    def objects_to_binary_matrix(self, object_seq_seq: Sequence[Sequence[Any]]) -> np.ndarray:
        """
        Returns a binary matrix, with a 1 at each index corresponding to one of
        the input objects.

        :param object_seq_seq: A sequence of sequences of objects.
        :return: A 2-dimensional array, where each row in the array corresponds
                 to a row in the input, with 1s at the indexes of each object,
                 and 0s at all other indexes.
        """
        binary_matrix = np.zeros((len(object_seq_seq), len(self.vocab) + self.start), dtype=int)
        for i, seq in enumerate(object_seq_seq):
            for obj in seq:
                if obj in self.vocab:
                    binary_matrix[i, self.vocab[obj]] = 1
        return binary_matrix

    def indexes_to_objects(self, index_vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of objects associated with the indexes in the input
        vector.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param index_vector: A 1-dimensional array of indexes
        :return: A sequence of objects, one for each index.
        """
        index_to_obj = {idx: obj for obj, idx in self.vocab.items()}
        return [index_to_obj.get(idx) for idx in index_vector if idx in index_to_obj]

    def index_matrix_to_objects(self, index_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects associated with the indexes
        in the input matrix.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param index_matrix: A 2-dimensional array of indexes
        :return: A sequence of sequences of objects, one for each index.
        """
        index_to_obj = {idx: obj for obj, idx in self.vocab.items()}
        return [[index_to_obj.get(idx) for idx in row if idx in index_to_obj] for row in index_matrix]

    def binary_vector_to_objects(self, vector: np.ndarray) -> Sequence[Any]:
        """
        Returns a sequence of the objects identified by the nonzero indexes in
        the input vector.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param vector: A 1-dimensional binary array
        :return: A sequence of objects, one for each nonzero index.
        """
        index_to_obj = {idx: obj for obj, idx in self.vocab.items()}
        return [index_to_obj.get(idx) for idx in np.nonzero(vector)[0] if idx in index_to_obj]

    def binary_matrix_to_objects(self, binary_matrix: np.ndarray) -> Sequence[Sequence[Any]]:
        """
        Returns a sequence of sequences of objects identified by the nonzero
        indices in the input matrix.

        If, for any of the indexes, there is not an associated object, that
        index is skipped in the output.

        :param binary_matrix: A 2-dimensional binary array
        :return: A sequence of sequences of objects, one for each nonzero index.
        """
        index_to_obj = {idx: obj for obj, idx in self.vocab.items()}
        return [[index_to_obj.get(idx) for idx in np.nonzero(row)[0] if idx in index_to_obj] for row in binary_matrix]