In [3]:
import tensorflow as tf
import numpy as np

In [None]:
import tensorflow as tf
import numpy as np


def fill_triangular(x, upper=False, name=None):
    """Creates a (batch of) triangular matrix from a vector of inputs.

    Args:
        x: `Tensor` representing lower (or upper) triangular elements.
        upper: Python `bool` representing whether output matrix should be upper
          triangular (`True`) or lower triangular (`False`, default).
        name: Python `str`. The name to give this op.

    Returns:
        tril: `Tensor` with lower (or upper) triangular elements filled from `x`.

    Raises:
        ValueError: if `x` cannot be mapped to a triangular matrix.
    """

    with tf.name_scope(name or 'fill_triangular'):
        x = tf.convert_to_tensor(x, name='x')

        # Get the last dimension size (m)
        m = x.shape[-1]
        
        # Calculate n from m using the quadratic formula
        if m is not None:
            m = np.int32(m)
            n = np.sqrt(0.25 + 2. * m) - 0.5
            if n != np.floor(n):
                raise ValueError('Input right-most shape ({}) does not '
                                 'correspond to a triangular matrix.'.format(m))
            n = np.int32(n)
            static_final_shape = tf.TensorShape(x.shape[:-1]).concatenate([n, n])
        else:
            m = tf.shape(x)[-1]
            n = tf.cast(
                tf.sqrt(0.25 + tf.cast(2 * m, dtype=tf.float32)), dtype=tf.int32)
            static_final_shape = tf.TensorShape(x.shape[:-1]).concatenate([None, None])

        # Determine the shape of the output tensor
        ndims = tf.rank(x)
        if upper:
            x_list = [x, tf.reverse(x[..., n:], axis=[ndims - 1])]
        else:
            x_list = [x[..., n:], tf.reverse(x, axis=[ndims - 1])]
        
        new_shape = (
            static_final_shape.as_list()
            if static_final_shape.is_fully_defined() else tf.concat(
                [tf.shape(x)[:-1], [n, n]], axis=0))
        
        x = tf.reshape(tf.concat(x_list, axis=-1), new_shape)
        
        # Create a triangular matrix
        x = tf.linalg.band_part(
            x, num_lower=(0 if upper else -1), num_upper=(-1 if upper else 0))
        
        # Set the static shape if it is fully defined
        x.set_shape(static_final_shape)
        return x

# 输入向量
x_lower = tf.constant([1.0, 2.0, 3.0, 
                       .0, 5.0, 6.0], dtype=tf.float32)
x_upper = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], dtype=tf.float32)

# 测试生成下三角矩阵
lower_triangular_matrix = fill_triangular(x_lower, upper=False, name="fill_triangular_lower")
print("Lower Triangular Matrix:")
print(lower_triangular_matrix)

# 测试生成上三角矩阵
upper_triangular_matrix = fill_triangular(x_upper, upper=True, name="fill_triangular_upper")
print("\nUpper Triangular Matrix:")
print(upper_triangular_matrix)

Lower Triangular Matrix:
tf.Tensor(
[[4. 0. 0.]
 [6. 5. 0.]
 [3. 2. 1.]], shape=(3, 3), dtype=float32)

Upper Triangular Matrix:
tf.Tensor(
[[1. 2. 3.]
 [0. 5. 6.]
 [0. 0. 4.]], shape=(3, 3), dtype=float32)


In [None]:
def fill_triangular_inverse(x, upper=False, name=None):
    """Creates a vector from a (batch of) triangular matrix.

    Args:
        x: `Tensor` representing lower (or upper) triangular elements.
        upper: Python `bool` representing whether output matrix should be upper
          triangular (`True`) or lower triangular (`False`, default).
        name: Python `str`. The name to give this op.

    Returns:
        flat_tril: (Batch of) vector-shaped `Tensor` representing vectorized lower
          (or upper) triangular elements from `x`.
    """

    with tf.name_scope(name or 'fill_triangular_inverse'):
        x = tf.convert_to_tensor(x, name='x')
        
        # Get the last dimension size (n)
        n = x.shape[-1]
        
        if n is not None:
            n = np.int32(n)
            m = np.int32((n * (n + 1)) // 2)
            static_final_shape = tf.TensorShape(x.shape[:-2]).concatenate([m])
        else:
            n = tf.shape(x)[-1]
            m = (n * (n + 1)) // 2
            static_final_shape = tf.TensorShape(x.shape[:-2]).concatenate([None])
        
        ndims = tf.rank(x)
        if upper:
            initial_elements = x[..., 0, :]
            triangular_portion = x[..., 1:, :]
        else:
            initial_elements = tf.reverse(x[..., -1, :], axis=[ndims - 2])
            triangular_portion = x[..., :-1, :]

        rotated_triangular_portion = tf.reverse(
            tf.reverse(triangular_portion, axis=[ndims - 1]), axis=[ndims - 2])
        
        consolidated_matrix = triangular_portion + rotated_triangular_portion
        
        end_sequence = tf.reshape(
            consolidated_matrix,
            tf.concat([tf.shape(x)[:-2], [n * (n - 1)]], axis=0))
        
        y = tf.concat([initial_elements, end_sequence[..., :m - n]], axis=-1)
        
        y.set_shape(static_final_shape)
        return y

# 输入向量
x_lower = tf.constant([1.0, 2.0, 3.0, 
                       .0, 5.0, 6.0], dtype=tf.float32)
x_upper = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], dtype=tf.float32)

# 测试生成下三角矩阵
lower_triangular_matrix = fill_triangular(x_lower, upper=False, name="fill_triangular_lower")
print("Lower Triangular Matrix:")
print(lower_triangular_matrix)

# 测试生成上三角矩阵
upper_triangular_matrix = fill_triangular(x_upper, upper=True, name="fill_triangular_upper")
print("\nUpper Triangular Matrix:")
print(upper_triangular_matrix)

In [7]:
import tensorflow as tf

def make_k_mers(sequences, k, pivot_left=True):
    """ Maps one hot encoded nucleotide sequences to a k-mer representation. 
        Args:
            sequences: A tensor of shape (b, L, 5) representing sequences of length L. 
                        Assumes that the last dimension is one-hot encoded with "N" corresponding to the last position.
            k: An integer specifying the length of the k-mer
            pivot_left: A boolean specifying whether to pivot the k-mer to the left or right. 
        Returns:
            A tensor of shape (b, L, 4**k-1, 4). If pivot_left is True, the last dimension corresponds 
            to the 4 possible nucleotides in the leftmost position of the k-mer. 
            Otherwise, the last dimension corresponds to the rightmost position in the k-mer.
            If the k-mer contains N, this is expressed equiprobably among the regular 4 nucleotides possible
            at that position.
    """
    L = tf.shape(sequences)[-2]
    n = tf.shape(sequences)[-1]-1 #alphabet size is the number of characters minus 1 (N)
    n = tf.cast(n, dtype=sequences.dtype) 
    # uniform distribution over alphabet in case of N 
    sequences_no_N = sequences[..., :-1]
    N_pos = tf.cast(sequences[..., -1:] == 1, dtype=sequences.dtype)
    sequences_no_N += (1/n) * N_pos
    # compute a padding for kmers that range over the sequence boundaries
    pad = tf.ones_like(sequences_no_N[:, :k-1, :], dtype=sequences.dtype) / n
    if pivot_left:
        sequences_padded_no_N = tf.concat([sequences_no_N, pad], axis=-2)
        k_mers = sequences_padded_no_N[:, :L, tf.newaxis, :] 
    else:
        sequences_padded_no_N = tf.concat([pad, sequences_no_N], axis=-2)
        k_mers = sequences_padded_no_N[:, k-1:L+k-1, tf.newaxis, :] 
    for i in range(1, k) if pivot_left else range(k-2, -1, -1):
        shift_i = sequences_padded_no_N[:, i:L+i, tf.newaxis, :, tf.newaxis] 
        k_mers = k_mers[..., tf.newaxis, :] * shift_i
        shape = [4**i, 4] if pivot_left else [4**(k-i-1), 4]
        k_mers = tf.reshape(k_mers, tf.concat([tf.shape(k_mers)[:-3], shape], axis=0))
    return k_mers



def encode_kmer_string(kmer, pivot_left=True, alphabet="ACGT"):
    """ Converts a k-mer to classes in the format (i,j) with i < n^{k-1} and j < n where n is the alphabet size. 
        E.g. AAA -> (0,0), AAT -> (3,0), TAA -> (0,3) if pivot_left is True, otherwise
             AAA -> (0,0), AAT -> (0,3), TAA -> (12, 0)
        The output is a one-hot encoding of these classes in case of A,C,G,T. 
        If the k-mer contains N, this is expressed equiprobably among the regular 4 nucleotides.
    """
    alphabet_with_unknown = alphabet + "N"
    kmer = [alphabet_with_unknown.index(x) for x in kmer]
    kmer = tf.constant(kmer)
    one_hot = tf.one_hot(kmer, len(alphabet_with_unknown)) 
    encoded_kmers = make_k_mers(one_hot[tf.newaxis, ...], k=len(kmer), pivot_left=pivot_left)
    if pivot_left:
        return tf.squeeze(encoded_kmers)[0]
    else:
        return tf.squeeze(encoded_kmers)[-1]
    
    
# 测试用例
sequences = tf.constant([[[1, 0, 0, 0, 0],
                        [0, 1, 0, 0, 0],
                        [0, 0, 1, 0, 0],
                        [0, 0, 0, 1, 0],
                        [0, 0, 0, 0, 1]]], dtype=tf.float32)
k = 3

k_mers_left = make_k_mers(sequences, k, pivot_left=True)
k_mers_right = make_k_mers(sequences, k, pivot_left=False)

print("k_mers_left shape:", k_mers_left.shape)
print("k_mers_right shape:", k_mers_right.shape)

kmer_string = "ACGN"
encoded_kmer_left = encode_kmer_string(kmer_string, pivot_left=True)
encoded_kmer_right = encode_kmer_string(kmer_string, pivot_left=False)

print("encoded_kmer_left shape:", encoded_kmer_left.shape)
print("encoded_kmer_right shape:", encoded_kmer_right.shape)

print("k_mers_left:", k_mers_left)
print("k_mers_right:", k_mers_right)
print("encoded_kmer_left:", encoded_kmer_left)
print("encoded_kmer_right:", encoded_kmer_right)

k_mers_left shape: (1, 5, 16, 4)
k_mers_right shape: (1, 5, 16, 4)
encoded_kmer_left shape: (64, 4)
encoded_kmer_right shape: (64, 4)
k_mers_left: tf.Tensor(
[[[[0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [1.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]]

  [[0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.       0.       0.      ]
   [0.       0.     