In [1]:
#only external lib allowed
import torch
import torch.nn as nn

import heapq # stdlib, basically the huffman tree.

In [9]:
class HierarchicalSoftmaxNode(nn.Module):
    
    def __init__(self, symbol, freq, classifier_vector=None, left=None, right=None):
        
        super(HierarchicalSoftmaxNode, self).__init__()
        
        
        self.classifier_vector = classifier_vector  # Classifier vector associated with this node
        
        
        self.left = left  # Reference to the left child node
        self.right = right  # Reference to the right child node
        self.symbol = symbol    # Leaf node: character
        self.freq = freq  # Internal node: value is the frequency


        
        # tree direction 0 or 1 
        self.huff = ''
        
        
    #comparator, lt
    def __lt__(self, other):
        return self.freq < other.freq
    
    #tostring   
    def __str__(self):
        return '(' + str(self.symbol)+','+str(self.freq) + ',' + str(self.huff) + ')'
    
        
    #What is done to the input it receives
    def forward(self, input):
        # Compute the probability based on the classifier vector
        # use a sigmoid function here
        return torch.sigmoid(torch.dot(self.classifier_vector, input))

In [3]:
def dict_to_nodes(d,vector_size):
    
    nodes = []
    
    for k,v in d.items():

        heapq.heappush(nodes, HierarchicalSoftmaxNode(symbol=k,freq=v,classifier_vector=torch.randn(vector_size)))
    

    return nodes

In [4]:
#returns root of the tree.
def build_tree(nodes):
    
    while len(nodes) > 1:
  
        # sort all the nodes in ascending order 
        # based on their frequency 
        left = heapq.heappop(nodes)
        right = heapq.heappop(nodes)

        # assign directional value to these nodes 
        # least
        
        left.huff = '0'
        #2nd least
        right.huff = '1'
        
        # combine the 2 smallest nodes to create 
        # new INTERNAL node as their parent 
        newNode = HierarchicalSoftmaxNode(symbol=left.symbol+right.symbol,freq=left.freq+right.freq, left=left, right=right,classifier_vector=torch.randn(vector_size))
        
        heapq.heappush(nodes, newNode)
        
    return nodes

In [5]:
# utility function for printing
def in_order_traversal(node,path):
    
    if node.left != None:
        
        in_order_traversal(node.left,path + node.left.huff)
    
    print(node)
    print(path)
    
    if node.right != None:
  
        in_order_traversal(node.right,path + node.huff)
    

In [6]:
# # Example usage:
# # Create a simple hierarchical softmax tree structure with three nodes
# node1 = HierarchicalSoftmaxNode(classifier_vector=torch.randn(10))
# node2 = HierarchicalSoftmaxNode(classifier_vector=torch.randn(10))
# node3 = HierarchicalSoftmaxNode(classifier_vector=torch.randn(10))

# # Set the hierarchy by connecting nodes
# node1.left = node2
# node1.right = node3

# # Calculate probability for a binary path (e.g., 01)
# #random input vector...
# input_vector = torch.randn(10)  # Example input vector
# print(input_vector)
# path = [0, 1]
# current_node = node1

# for bit in path:
#     if bit == 0:
#         current_node.left = node2
#     else:
#         current_node.left = node3

# print(current_node)

        
# probability = current_node(input_vector)
# print("Probability:", probability.item())

In [7]:
# Example usage
if __name__ == "__main__":

    ft = {
        
        'c':63,
        'a':27,
        'z':72,
        'b':17,
        'y':89
        
    }
    #vector size needs to be consistent
    vector_size = 100
    nodes = dict_to_nodes(ft,vector_size)

 
    ht = build_tree(nodes)
    root = ht[0]
#     print(root.left.left.left)
    print(in_order_traversal(root,''))

    

(b,17,0)
000
(ba,44,0)
00
(a,27,1)
000
(bac,107,0)
0
(c,63,1)
00
(baczy,268,)

(z,72,0)
0
(zy,161,1)

(y,89,1)
1
None


In [8]:
# # Calculate probability for a binary path (e.g., 01)
# #random input vector...
input_vector = torch.randn(100)  # Example input vector
print(input_vector)

path = [0, 1]
# 
# nodec = HierarchicalSoftmaxNode(symbol='s',freq=2,classifier_vector=torch.randn(100))

# for bit in path:
#     if bit == 0:
#         current_node.left = node2
#     else:
#         current_node.left = node3

# print(current_node)

        
probability = root(input_vector)
print("Probability:", probability.item())

tensor([ 0.8634, -0.3188, -0.6573,  0.3764,  0.8810,  0.8499, -0.2471,  0.4599,
        -0.2958,  1.7726, -0.2420,  0.0533,  0.2094, -1.0269,  0.7321, -0.0689,
         0.7164,  0.0538,  0.4742, -0.3037,  0.5839, -0.3113,  1.2896,  0.4171,
        -0.1678,  1.0108, -0.9573, -1.7796,  1.4466, -1.3312,  0.1728,  2.4054,
         0.5602,  0.2477,  1.5449,  0.4252, -0.1937,  0.6368,  0.1779,  0.7138,
        -0.1014, -0.4125,  0.6675, -0.1669,  0.5386, -1.3797, -0.2238, -1.9124,
         0.1125,  1.9674,  0.7533,  0.2484, -0.7502, -0.6302,  1.3011,  1.1563,
        -0.1578, -0.5155, -0.7238,  0.3146, -0.0548, -0.0721,  1.1268, -0.3757,
        -0.7721,  1.5280, -0.4535, -0.8227,  1.1911, -0.3509, -0.4551,  0.7761,
         0.2131, -0.3325,  0.1012,  0.2210, -0.1523, -0.4913, -1.2180,  0.0413,
        -0.0935, -0.2708, -0.0610,  2.0093,  0.3620,  0.6539, -0.3561,  0.4784,
         0.1573,  0.3050,  0.6772, -0.1379,  2.3391, -0.2844,  0.6338,  0.4322,
         1.3598,  0.0763,  0.0296, -0.26

In [10]:
#TODO feed in the word embedding weights from the first model...... standardise the data
#Todo train the model.

class LanguageModel(torch.nn.Module):
  def __init__(self, embedding_weights, num_classes=7):
    super(Language, self).__init__()
    #The embedding weights...???
    #THe embedding layer is basically the embedding encoding. THey have the same values. THe word encoding is just a lookup table of the weights.
    vocab_size, embedding_dim = embedding_weights.size()
    
    self.embeddings = torch.nn.Embedding(vocab_size, embedding_dim)
    #weights are being loaded, not the encoded output.
    self.embeddings.load_state_dict({'weight': embedding_weights})
    # self.embeddings.requires_grad = False  # Optional: Freeze embeddings
    self.linear = torch.nn.Linear(embedding_dim, num_classes)

  def forward(self, inputs):
    embeds = self.embeddings(inputs)
    # Average pooling along the sequence length
    pooled = torch.mean(embeds, dim=1)
    output = self.linear(pooled)
    return output

In [None]:
def train(model):
    
    
    

In [None]:
if __name__ == '__main__':
  cbow = CBOW(20000, 50)
  emb_weights = cbow.embeddings.weight.data # shape(20.000, 50)
    
    #use this from the CBOW
  lang = Language(emb_weights)
  sentence = torch.tensor([[5, 89, 3]]) # shape(1, 3)
  out = lang(sentence) # shape(1, 7)