In [1]:
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

def preprocess(sentence):
    # Tokenize the sentence
    tokens = word_tokenize(sentence.lower())
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    
    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    # Return the preprocessed sentence
    return ' '.join(tokens)

def get_word_embeddings(sentence, target_word):
    # Preprocess the sentence
    preprocessed_sentence = preprocess(sentence)
    
    # Create a vectorizer instance
    vectorizer = TfidfVectorizer()
    
    # Fit and transform the preprocessed sentence
    tfidf_matrix = vectorizer.fit_transform([preprocessed_sentence])
    
    # Get the feature names (words)
    feature_names = vectorizer.get_feature_names()
    
    # Get the index of the target word
    target_word_index = feature_names.index(target_word)
    
    # Get the word embeddings
    embeddings = tfidf_matrix.toarray()[0]
    
    # Get the embedding value for the target word
    target_word_embedding = embeddings[target_word_index]
    
    return target_word_embedding

# Example usage
sentence = "The cat is sitting on the mat"
target_word = "cat"

# Get the word embeddings for the target word in the sentence
target_word_embedding = get_word_embeddings(sentence, target_word)

print(f"Word Embedding for '{target_word}': {target_word_embedding}")


Word Embedding for 'cat': 0.5773502691896258




In [2]:
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

def preprocess(sentence):
    # Tokenize the sentence
    tokens = word_tokenize(sentence.lower())
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    
    # Return the preprocessed sentence
    return ' '.join(tokens)

def get_word_embeddings(sentence, target_word):
    # Preprocess the sentence
    preprocessed_sentence = preprocess(sentence)
    
    # Create a vectorizer instance
    vectorizer = TfidfVectorizer()
    
    # Fit and transform the preprocessed sentence
    tfidf_matrix = vectorizer.fit_transform([preprocessed_sentence])
    
    # Get the feature names (words)
    feature_names = vectorizer.get_feature_names()
    
    # Get the index of the target word
    target_word_index = feature_names.index(target_word)
    
    # Get the word embeddings
    embeddings = tfidf_matrix.toarray()[0]
    
    # Get the embedding value for the target word
    target_word_embedding = embeddings[target_word_index]
    
    return target_word_embedding

# Example usage
sentence = "The cat is sitting on the mat"
target_word = "cat"

# Get the word embeddings for the target word in the sentence
target_word_embedding = get_word_embeddings(sentence, target_word)

print(f"Word Embedding for '{target_word}': {target_word_embedding}")


Word Embedding for 'cat': 0.5773502691896258


In [3]:
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

def lemmatize_word(token):
    # Get the word's part of speech tag
    pos_tag = nltk.pos_tag([token])[0][1][0].upper()
    
    # Map the POS tag to WordNet's POS tag format
    if pos_tag.startswith('J'):
        pos_tag = wordnet.ADJ
    elif pos_tag.startswith('V'):
        pos_tag = wordnet.VERB
    elif pos_tag.startswith('N'):
        pos_tag = wordnet.NOUN
    elif pos_tag.startswith('R'):
        pos_tag = wordnet.ADV
    else:
        pos_tag = wordnet.NOUN  # Default to noun if the POS tag is not recognized
    
    # Lemmatize the word using WordNet's lemmatizer
    lemmatizer = nltk.WordNetLemmatizer()
    lemma = lemmatizer.lemmatize(token, pos_tag)
    
    return lemma

def preprocess(sentence):
    # Tokenize the sentence
    tokens = word_tokenize(sentence.lower())
    
    # Remove stopwords and lemmatize the tokens
    stop_words = set(stopwords.words('english'))
    tokens = [lemmatize_word(token) for token in tokens if token not in stop_words]
    
    # Return the preprocessed sentence
    return ' '.join(tokens)

def get_word_embeddings(sentence, target_word):
    # Preprocess the sentence
    preprocessed_sentence = preprocess(sentence)
    
    # Create a vectorizer instance
    vectorizer = TfidfVectorizer()
    
    # Fit and transform the preprocessed sentence
    tfidf_matrix = vectorizer.fit_transform([preprocessed_sentence])
    
    # Get the feature names (words)
    feature_names = vectorizer.get_feature_names()
    
    # Get the index of the target word
    target_word_index = feature_names.index(target_word)
    
    # Get the word embeddings
    embeddings = tfidf_matrix.toarray()[0]
    
    # Get the embedding value for the target word
    target_word_embedding = embeddings[target_word_index]
    
    return target_word_embedding

# Example usage
sentence = "The cat is sitting on the mat"
target_word = "cat"

# Get the word embeddings for the target word in the sentence
target_word_embedding = get_word_embeddings(sentence, target_word)

print(f"Word Embedding for '{target_word}': {target_word_embedding}")


Word Embedding for 'cat': 0.5773502691896258


In [7]:
import numpy as np
from collections import Counter
import random
import math

def skipgram_negative_sampling(corpus, window_size, embedding_dim, num_negative_samples, learning_rate, epochs):
    # Step 1: Preprocess the corpus and create vocabulary
    words = [word for sentence in corpus for word in sentence]
    word_counts = Counter(words)
    vocabulary = list(word_counts.keys())
    vocab_size = len(vocabulary)
    word2idx = {word: idx for idx, word in enumerate(vocabulary)}
    idx2word = {idx: word for word, idx in word2idx.items()}

    # Step 2: Initialize embedding matrices
    target_embedding = np.random.uniform(-1, 1, (vocab_size, embedding_dim))
    context_embedding = np.random.uniform(-1, 1, (vocab_size, embedding_dim))

    # Step 3: Training the skip-gram model
    for _ in range(epochs):
        for sentence in corpus:
            for i, target_word in enumerate(sentence):
                target_idx = word2idx[target_word]

                # Generate context words within the window
                start = max(0, i - window_size)
                end = min(i + window_size, len(sentence))
                context_words = sentence[start:i] + sentence[i+1:end+1]

                for context_word in context_words:
                    context_idx = word2idx[context_word]

                    # Update target and context embeddings
                    target_emb = target_embedding[target_idx]
                    context_emb = context_embedding[context_idx]

                    # Calculate similarity between target and context embeddings
                    similarity = np.dot(target_emb, context_emb)

                    # Update target and context embeddings using gradient descent
                    target_embedding[target_idx] -= learning_rate * similarity * context_emb
                    context_embedding[context_idx] -= learning_rate * similarity * target_emb

                # Negative sampling
                for _ in range(num_negative_samples):
                    noise_word = random.choice(vocabulary)
                    noise_idx = word2idx[noise_word]

                    # Update target and noise embeddings using gradient descent
                    target_emb = target_embedding[target_idx]
                    noise_emb = context_embedding[noise_idx]

                    similarity = np.dot(target_emb, noise_emb)

                    # Update target and noise embeddings using gradient descent with negative similarity
                    target_embedding[target_idx] -= learning_rate * (1 - similarity) * noise_emb
                    context_embedding[noise_idx] -= learning_rate * (1 - similarity) * target_emb

    return target_embedding, context_embedding


# Example usage
corpus = [["I", "love", "to", "read", "books"],
          ["Reading", "is", "my", "favorite", "hobby"],
          ["Books", "expand", "my", "knowledge"]]

window_size = 2
embedding_dim = 100
num_negative_samples = 5
learning_rate = 0.01
epochs = 100

target_embedding, context_embedding = skipgram_negative_sampling(corpus, window_size, embedding_dim,
                                                                 num_negative_samples, learning_rate, epochs)

# Access the embeddings for a target word
target_word = "books"
target_idx = word2idx[target_word]
target_vector = target_embedding[target_idx]
print("Embedding for target word 'books':", target_vector)

# Access the embeddings for a context word
context_word = "reading"
context_idx = word2idx[context_word]
context_vector = context_embedding[context_idx]
print("Embedding for context word 'reading':", context_vector)


  context_embedding[noise_idx] -= learning_rate * (1 - similarity) * target_emb
  context_embedding[context_idx] -= learning_rate * similarity * target_emb


NameError: name 'word2idx' is not defined

In [2]:
import numpy as np
from collections import Counter
import random
import math
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string

nltk.download('stopwords')
nltk.download('punkt')

def skipgram_negative_sampling(corpus, window_size, embedding_dim, num_negative_samples, learning_rate, epochs):
    # Step 1: Preprocess the corpus and create vocabulary
    stop_words = set(stopwords.words('english'))
    punctuations = set(string.punctuation)
    words = [word.lower() for sentence in corpus for word in sentence if word.lower() not in stop_words and word.lower() not in punctuations]
    word_counts = Counter(words)
    vocabulary = list(word_counts.keys())
    vocab_size = len(vocabulary)
    word2idx = {word: idx for idx, word in enumerate(vocabulary)}
    idx2word = {idx: word for word, idx in word2idx.items()}

    # Step 2: Initialize embedding matrices
    target_embedding = np.random.uniform(-1, 1, (vocab_size, embedding_dim))
    context_embedding = np.random.uniform(-1, 1, (vocab_size, embedding_dim))

    # Step 3: Training the skip-gram model
    for _ in range(epochs):
        for sentence in corpus:
            for i, target_word in enumerate(sentence):
                target_idx = word2idx[target_word]

                # Generate context words within the window
                start = max(0, i - window_size)
                end = min(i + window_size, len(sentence))
                context_words = sentence[start:i] + sentence[i+1:end+1]

                for context_word in context_words:
                    context_idx = word2idx[context_word]

                    # Update target and context embeddings
                    target_emb = target_embedding[target_idx]
                    context_emb = context_embedding[context_idx]

                    # Calculate similarity between target and context embeddings
                    similarity = np.dot(target_emb, context_emb)

                    # Update target and context embeddings using gradient descent
                    target_embedding[target_idx] -= learning_rate * similarity * context_emb
                    context_embedding[context_idx] -= learning_rate * similarity * target_emb

                # Negative sampling
                for _ in range(num_negative_samples):
                    noise_word = random.choice(vocabulary)
                    noise_idx = word2idx[noise_word]

                    # Update target and noise embeddings using gradient descent
                    target_emb = target_embedding[target_idx]
                    noise_emb = context_embedding[noise_idx]

                    similarity = np.dot(target_emb, noise_emb)

                    # Update target and noise embeddings using gradient descent with negative similarity
                    target_embedding[target_idx] -= learning_rate * (1 - similarity) * noise_emb
                    context_embedding[noise_idx] -= learning_rate * (1 - similarity) * target_emb

    return target_embedding, context_embedding


# Example usage
corpus = [["i", "love", "to", "read", "books"],
          ["Reading", "is", "my", "favorite", "hobby"],
          ["Books", "expand", "my", "knowledge"]]

window_size = 2
embedding_dim = 100
num_negative_samples = 5
learning_rate = 0.01
epochs = 100

target_embedding, context_embedding = skipgram_negative_sampling(corpus, window_size, embedding_dim,
                                                                 num_negative_samples, learning_rate, epochs)

# Access the embeddings for a target word
target_word = "books"
word2idx = {word: idx for idx, word in enumerate(vocabulary)}  # Create word2idx using the vocabulary within the function
target_idx = word2idx[target_word]
target_vector = target_embedding[target_idx]
print("Embedding for target word 'books':", target_vector)

# Access the embeddings for a context word
context_word = "reading"
context_idx = word2idx[context_word]
context_vector = context_embedding[context_idx]
print("Embedding for context word 'reading':", context_vector)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


KeyError: 'i'

In [6]:
import numpy as np
import random
from collections import defaultdict
from math import exp, log

# Sample corpus
corpus = [
    'I',
    'love',
    'to',
    'play',
    'football',
    'with',
    'my',
    'friends'
]

# Set random seed for reproducibility
random.seed(42)

# Initialize word embeddings with random values
word_embeddings = defaultdict(lambda: np.random.uniform(-1, 1, 5))

# Define hyperparameters
window_size = 2
embedding_size = 5
learning_rate = 0.01
num_epochs = 1000

# Generate training data
training_data = []
for i in range(window_size, len(corpus) - window_size):
    center_word = corpus[i]
    context_words = corpus[i - window_size:i] + corpus[i + 1:i + window_size + 1]
    training_data.append((center_word, context_words))

# Generate noise word sampling table
word_freq = defaultdict(int)
for center_word, _ in training_data:
    word_freq[center_word] += 1

total_words = sum(word_freq.values())
word_sampling_table = []
for center_word in word_freq.keys():
    word_prob = word_freq[center_word] / total_words
    word_sampling_table.extend([center_word] * int(word_prob * 1e6))

# Logistic function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    random.shuffle(training_data)

    for center_word, context_words in training_data:
        center_embedding = word_embeddings[center_word]

        # Update embeddings for context words
        for context_word in context_words:
            context_embedding = word_embeddings[context_word]

            # Compute loss function
            noise_words = random.sample(word_sampling_table, k=5)  # Select 5 noise words
            noise_prob = sum(sigmoid(np.dot(center_embedding, word_embeddings[noise_word])) for noise_word in noise_words)
            context_prob = sigmoid(np.dot(center_embedding, context_embedding))
            loss = log(context_prob / noise_prob)

            # Update word embeddings using stochastic gradient descent
            gradient = (1 - context_prob) * center_embedding + sum(context_prob * word_embeddings[noise_word] for noise_word in noise_words)
            word_embeddings[center_word] -= learning_rate * gradient

            total_loss += loss

    if (epoch + 1) % 100 == 0:
        print(f"Epoch: {epoch + 1} Loss: {total_loss:.4f}")

# Print word embeddings
for word, embedding in word_embeddings.items():
    print(f"{word}: {embedding}")


Epoch: 100 Loss: -25.8659
Epoch: 200 Loss: -25.7636
Epoch: 300 Loss: -25.7524
Epoch: 400 Loss: -25.7512
Epoch: 500 Loss: -25.7510
Epoch: 600 Loss: -25.7510
Epoch: 700 Loss: -25.7510
Epoch: 800 Loss: -25.7510
Epoch: 900 Loss: -25.7510
Epoch: 1000 Loss: -25.7510
football: [ 1.58880190e-09  5.56114850e-10 -5.46111448e-11 -1.03751929e-09
 -9.75237605e-12]
to: [-4.29867850e-10 -3.01787997e-10  2.53605885e-10  7.61082327e-10
 -7.32855206e-10]
play: [-8.18868096e-11 -2.18262106e-11  2.43801340e-10 -4.22619308e-10
 -9.59988836e-11]
with: [-1.07356095e-09 -2.29034415e-10 -4.39095631e-10  6.76358087e-10
  8.45706406e-10]
my: [-0.99138595 -0.77163897 -0.83172242 -0.21786612  0.26191911]
love: [-0.92485305 -0.4197433   0.83177126  0.01199458  0.85986134]
friends: [ 0.64663647 -0.56129889  0.63498935  0.31828337 -0.69871042]
I: [ 0.837873   -0.17149479  0.46193055 -0.39327006  0.05000026]


In [7]:
import numpy as np
import random
from collections import defaultdict
from math import exp, log

# Sample corpus
corpus = [
    'I',
    'love',
    'to',
    'play',
    'football',
    'with',
    'my',
    'friends'
]

# Set random seed for reproducibility
random.seed(42)

# Initialize word embeddings with random values
word_embeddings = defaultdict(lambda: np.random.uniform(-1, 1, 5))

# Define hyperparameters
window_size = 2
embedding_size = 5
learning_rate = 0.01
num_epochs = 1000

# Generate training data
training_data = []
for i in range(window_size, len(corpus) - window_size):
    center_word = corpus[i]
    context_words = corpus[i - window_size:i] + corpus[i + 1:i + window_size + 1]
    training_data.append((center_word, context_words))

# Generate noise word sampling table
word_freq = defaultdict(int)
for center_word, _ in training_data:
    word_freq[center_word] += 1

total_words = sum(word_freq.values())
word_sampling_table = []
for center_word in word_freq.keys():
    word_prob = word_freq[center_word] / total_words
    word_sampling_table.extend([center_word] * int(word_prob * 1e6))

# Logistic function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    random.shuffle(training_data)

    for center_word, context_words in training_data:
        center_embedding = word_embeddings[center_word]

        # Update embeddings for context words
        for context_word in context_words:
            context_embedding = word_embeddings[context_word]

            # Compute loss function
            noise_words = random.sample(word_sampling_table, k=5)  # Select 5 noise words
            noise_prob = sum(sigmoid(np.dot(center_embedding, word_embeddings[noise_word])) for noise_word in noise_words)
            context_prob = sigmoid(np.dot(center_embedding, context_embedding))
            loss = log(context_prob / noise_prob)

            # Update word embeddings using stochastic gradient descent
            gradient = (1 - context_prob) * center_embedding + sum(context_prob * word_embeddings[noise_word] for noise_word in noise_words)
            word_embeddings[center_word] -= learning_rate * gradient

            total_loss += loss

    if (epoch + 1) % 100 == 0:
        print(f"Epoch: {epoch + 1} Loss: {total_loss:.4f}")

# Print word embeddings
print("Word Embeddings:")
for word, embedding in word_embeddings.items():
    embedding_str = ' '.join(f'{val:.4f}' for val in embedding)
    print(f"{word}: {embedding_str}")


Epoch: 100 Loss: -25.7516
Epoch: 200 Loss: -25.7440
Epoch: 300 Loss: -25.7496
Epoch: 400 Loss: -25.7508
Epoch: 500 Loss: -25.7510
Epoch: 600 Loss: -25.7510
Epoch: 700 Loss: -25.7510
Epoch: 800 Loss: -25.7510
Epoch: 900 Loss: -25.7510
Epoch: 1000 Loss: -25.7510
Word Embeddings:
football: 0.0000 0.0000 -0.0000 -0.0000 -0.0000
to: -0.0000 -0.0000 0.0000 0.0000 -0.0000
play: -0.0000 -0.0000 0.0000 0.0000 0.0000
with: -0.0000 -0.0000 0.0000 -0.0000 0.0000
my: -0.5145 0.1089 0.4041 -0.4982 0.2397
love: -0.7791 0.1602 0.4451 0.0216 -0.8995
friends: -0.5265 0.1321 0.2581 -0.2517 0.7662
I: -0.2670 0.1070 -0.2035 -0.4504 0.6840


In [9]:
import numpy as np
import random
from collections import defaultdict
from math import exp, log

# Sample corpus
corpus = [
    'I',
    'love',
    'to',
    'play',
    'football',
    'with',
    'my',
    'friends'
]

# Set random seed for reproducibility
random.seed(42)

# Initialize word embeddings with random values
word_embeddings = defaultdict(lambda: np.random.uniform(-1, 1, 5))

# Define hyperparameters
window_size = 2
embedding_size = 5
learning_rate = 0.01
num_epochs = 1000

# Generate training data
training_data = []
for i in range(window_size, len(corpus) - window_size):
    center_word = corpus[i]
    context_words = corpus[i - window_size:i] + corpus[i + 1:i + window_size + 1]
    training_data.append((center_word, context_words))

# Generate noise word sampling table
word_freq = defaultdict(int)
for center_word, _ in training_data:
    word_freq[center_word] += 1

total_words = sum(word_freq.values())
word_sampling_table = []
for center_word in word_freq.keys():
    word_prob = word_freq[center_word] / total_words
    word_sampling_table.extend([center_word] * int(word_prob * 1e6))

# Logistic function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    random.shuffle(training_data)

    print(f"Epoch {epoch + 1}/{num_epochs}:")
    for center_word, context_words in training_data:
        center_embedding = word_embeddings[center_word]

        # Update embeddings for context words
        for context_word in context_words:
            context_embedding = word_embeddings[context_word]

            # Compute loss function
            noise_words = random.sample(word_sampling_table, k=5)  # Select 5 noise words
            noise_prob = sum(sigmoid(np.dot(center_embedding, word_embeddings[noise_word])) for noise_word in noise_words)
            context_prob = sigmoid(np.dot(center_embedding, context_embedding))
            loss = log(context_prob / noise_prob)

            # Update word embeddings using stochastic gradient descent
            gradient = (1 - context_prob) * center_embedding + sum(context_prob * word_embeddings[noise_word] for noise_word in noise_words)
            word_embeddings[center_word] -= learning_rate * gradient

            total_loss += loss

    if (epoch + 1) % 100 == 0:
        print(f"  Loss: {total_loss:.4f}")

# Print word embeddings
print("Word Embeddings:")
for word, embedding in word_embeddings.items():
    embedding_str = ' '.join(f'{val:.4f}' for val in embedding)
    print(f"{word}: {embedding_str}")


TypeError: list indices must be integers or slices, not str

In [12]:
import numpy as np
import random
from collections import defaultdict
from math import exp, log

# Sample corpus
corpus = [
    'I',
    'love',
    'to',
    'play',
    'football',
    'with',
    'my',
    'friends'
]

# Set random seed for reproducibility
random.seed(42)

# Initialize word embeddings with random values
word_embeddings = defaultdict(lambda: np.random.uniform(-1, 1, 5))

# Define hyperparameters
window_size = 2
embedding_size = 5
learning_rate = 0.01
num_epochs = 1000

# Generate training data
training_data = []
for i in range(window_size, len(corpus) - window_size):
    center_word = corpus[i]
    context_words = corpus[i - window_size:i] + corpus[i + 1:i + window_size + 1]
    training_data.append((center_word, context_words))

# Generate noise word sampling table
word_freq = defaultdict(int)
for center_word, _ in training_data:
    word_freq[center_word] += 1

total_words = sum(word_freq.values())
word_sampling_table = []
for center_word in word_freq.keys():
    word_prob = word_freq[center_word] / total_words
    word_sampling_table.extend([center_word] * int(word_prob * 1e6))

# Generate embeddings for all target words
target_embeddings = defaultdict(lambda: np.random.uniform(-1, 1, embedding_size))
# Generate embeddings for all context words and noise words
context_embeddings = defaultdict(lambda: np.random.uniform(-1, 1, embedding_size))
noise_embeddings = {}

# Logistic function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    random.shuffle(training_data)

    for center_word, context_words in training_data:
        target_embedding = target_embeddings[center_word]

        # Update embeddings for context words
        for context_word in context_words:
            context_embedding = context_embeddings[context_word]

            # Compute loss function
            noise_words = random.sample(word_sampling_table, k=5)  # Select 5 noise words
            noise_embeddings = {noise_word: word_embeddings[noise_word] for noise_word in noise_words}
            noise_prob = sum(sigmoid(np.dot(target_embedding, noise_embeddings[noise_word])) for noise_word in noise_words)
            context_prob = sigmoid(np.dot(target_embedding, context_embedding))
            loss = log(context_prob / noise_prob)

            # Update target word embeddings using stochastic gradient descent
            gradient = (1 - context_prob) * target_embedding + sum(context_prob * noise_embeddings[noise_word] for noise_word in noise_words)
            target_embeddings[center_word] -= learning_rate * gradient

            total_loss += loss

    if (epoch + 1) % 100 == 0:
        print(f"Epoch: {epoch + 1} Loss: {total_loss:.4f}")

# Print target word embeddings
print("Target Word Embeddings:")
for word, embedding in target_embeddings.items():
    embedding_str = ' '.join(f'{val:.4f}' for val in embedding)
    print(f"{word}: {embedding_str}")

Epoch: 100 Loss: -19.4994
Epoch: 200 Loss: -19.8293
Epoch: 300 Loss: -17.9225
Epoch: 400 Loss: -18.4023
Epoch: 500 Loss: -19.2458
Epoch: 600 Loss: -20.7343
Epoch: 700 Loss: -18.9494
Epoch: 800 Loss: -18.6678
Epoch: 900 Loss: -20.5577
Epoch: 1000 Loss: -20.9200
Target Word Embeddings:
football: 0.5141 -0.5435 -0.1455 2.4211 -0.6335
play: 0.3464 -0.2436 -0.1005 1.4191 -0.4414
with: 0.5737 -0.5057 -0.0869 2.5148 -0.7381
to: 0.5348 -0.4959 -0.1736 2.5381 -0.7130
