In [28]:
import numpy as np

class WordEmbedding:
    # Initialize the weights of Model
    def __init__(self, vocab, embedding_dim):
        self.vocab = vocab
        self.embedding_dim = embedding_dim
        self.embeddings = {word: np.random.rand(embedding_dim) for word in vocab}

    # Update Embeddings Learning process - Ignore non-keys
    def update_embeddings(self, target_word, context_words, learning_rate=0.01):

       #print(target_word  , context_words)
        target_vector = self.embeddings.get(target_word)
        context_vectors = []
        for context_word in context_words:
            context_vector = self.embeddings.get(context_word)
            if context_vector is not None:
              context_vectors.append(context_vector)

        #print(target_vector  , context_vectors)

        if target_vector is not None and all(context_vector is not None for context_vector in context_vectors):
            target_vector_arr = np.array(target_vector)
            context_vectors_arr = np.array(context_vectors)

            # Move target vector closer to context vectors
            # Model paramter - Mean is chosen to minimize loss ( * also called as loss function)
            #print(target_word , "Initial", self.embeddings[target_word])
            self.embeddings[target_word] = target_vector_arr + learning_rate * (np.mean(context_vectors_arr, axis=0) - target_vector_arr)
            #print(target_word , "Adjusted", self.embeddings[target_word])

    # Update Embeddings Learning process - ignore non-keys
    def train(self, corpus, window_size, learning_rate, epochs):
        for epoch in range(epochs):
            for sentence in corpus:
                for i, target_word in enumerate(sentence):
                    context = self.get_context_words(sentence, i, window_size)
                    self.update_embeddings(target_word, context, learning_rate)

    # Traverse corpus sentences within specified window , identify target context ( * collection of words )
    def get_context_words(self, sentence, target_index, window_size):
        start = max(0, target_index - window_size)
        end = min(len(sentence), target_index + window_size + 1)
        context = [sentence[i] for i in range(start, end) if i != target_index]
        return context

    def get_word_vector(self, word):
        return self.embeddings.get(word)

# Software Engineering embedding vocabulary , corpus , Tockenized Input / Output

vocab = ["Requirement","specifications", "Design", "blueprint","Coding","functional", "code" , "Testing", "examination","Deployment","finalized"]
corpus = [["Clear", "and", "concise", "specifications", "pave", "the", "path", "for", "successful", "project", "endeavors.", "Requirement"],
    ["Crafting", "an", "elegant", "blueprint", "ensures", "the", "foundation", "for", "a", "robust", "and", "scalable", "solution.", "Design"],
    ["Transforming", "conceptualized", "designs", "into", "functional", "code", "demands", "precision", "and", "creativity.", "Coding"],
    ["Rigorous", "examination", "guarantees", "the", "reliability", "and", "functionality", "of", "the", "developed", "system.", "Testing"],
    ["Seamlessly", "rolling", "out", "the", "finalized", "product", "heralds", "the", "culmination", "of", "diligent", "efforts", "and", "signifies", "the", "beginning", "of", "its", "journey.", "Deployment"]
]

# Hyperparameters Used in Model
embedding_dim = 5  # Adjust as needed
model = WordEmbedding(vocab, embedding_dim)
window_size = 15
learning_rate = 0.01
epochs = 1000


print("-" * 100)
print(f"Hyperparamters: \n Embedding Dimension : {embedding_dim} \n Window Size : {window_size}")
print(f"Learning Rate : {learning_rate} \n epochs : {epochs}")
print("-" * 100)
print(f"Model Parameters:\n Loss function - Arithmetic mean")
print("-" * 100)

# Initial Weights
for value in vocab:
    word_vector = model.get_word_vector(value)
    print(f"Vector Representation Initial {value}:", word_vector)

# Train the model
model.train(corpus, window_size, learning_rate, epochs)

print("-" * 100)

# Final weights
for value in vocab:
    word_vector = model.get_word_vector(value)
    print(f"Vector representation Final {value}:", word_vector)

----------------------------------------------------------------------------------------------------
Hyperparamters: 
 Embedding Dimension : 5 
 Window Size : 15
Learning Rate : 0.01 
 epochs : 1000
----------------------------------------------------------------------------------------------------
Model Parameters:
 Loss function - Arithmetic mean
----------------------------------------------------------------------------------------------------
Vector Representation Initial Requirement: [0.76055849 0.844884   0.202172   0.96597264 0.41830429]
Vector Representation Initial specifications: [0.02524125 0.82176181 0.38187003 0.37768834 0.87762155]
Vector Representation Initial Design: [0.48026498 0.06361849 0.54324099 0.04011431 0.42374923]
Vector Representation Initial blueprint: [0.30072414 0.29543874 0.25827106 0.29171076 0.23311438]
Vector Representation Initial Coding: [0.48695263 0.02867212 0.89973396 0.98180342 0.71614756]
Vector Representation Initial functional: [0.94788655 0.1