In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

# Sample corpus
corpus = ["king palace", "queen palace", "man home", "woman home"]

# Hyperparameters
embedding_dim = 2
learning_rate = 0.01
epochs = 1000

# Vocabulary and word-to-index mapping
vocab = ["king", "queen", "man", "woman", "palace", "home"]
word_to_ix = {word: i for i, word in enumerate(vocab)}

# Defining the Skip-gram model
class SkipGram(nn.Module):
    def __init__(self, vocab_size, embed_dim):
        super(SkipGram, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embed_dim)

    def forward(self, target_word):
        return self.embeddings(target_word)

# Training data preparation (target, context)
pairs = []
for sentence in corpus:
    words = sentence.split()
    for i, target_word in enumerate(words):
        context = [words[j] for j in range(max(0, i-1), min(len(words), i+2)) if j != i]
        pairs.extend((word_to_ix[target_word], word_to_ix[context_word]) for context_word in context)


# Model, loss function, and optimizer initialization
model = SkipGram(len(vocab), embedding_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training
for epoch in range(epochs):
    total_loss = 0
    for target, context in pairs:
        optimizer.zero_grad()
        target_var = torch.LongTensor([target])
        context_var = torch.LongTensor([context])
        embed = model(target_var)
        scores = torch.matmul(embed, model.embeddings.weight.t())  # Dot product with all embeddings
        loss = criterion(scores, context_var)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        

# Extract embeddings
embeddings = model.embeddings.weight.data.numpy()
print("Word Embeddings:")
for word, i in word_to_ix.items():
    print(f"{word}: {embeddings[i]}")



Word Embeddings:
king: [ 0.17706524 -1.6910555 ]
queen: [ 0.5913763 -1.6049011]
man: [-0.30091476  1.7737799 ]
woman: [-0.4225085  1.7600123]
palace: [ 0.43786287 -1.8689765 ]
home: [-0.40699705  1.9853342 ]


In [48]:
a = 'apple,cat'

In [59]:
import pandas as pd
df = pd.DataFrame()

df['A'] = ['apple,cat', 'Boston, caty']
df
# df['B'] = df['A'].apply(split(','))

Unnamed: 0,A
0,"apple,cat"
1,"Boston, caty"


In [64]:
df["b"] = df['A'].apply(lambda x: len(x.split(",")))

In [65]:
df

Unnamed: 0,A,b
0,"apple,cat",2
1,"Boston, caty",2


In [None]:
import pandas as pd

# Creating a DataFrame
df = pd.DataFrame({
    'A': ['apple cat', 'Boston caty'],
    'B': ['dog orange', 'NewYork city']
})

# Function to count words in a string
def count_words(s):
    words = s.split()  # Splitting by space
    return len(words)

# Applying the function to each element in columns 'A' and 'B', and summing the word counts
df['total_word_count'] = df.apply(lambda row: count_words(row['A']) + count_words(row['B']), axis=1)

print(df)


In [66]:
df

Unnamed: 0,A,b
0,"apple,cat",2
1,"Boston, caty",2


In [67]:
df["C"] = df['b'].apply(lambda x: x + 200)

In [68]:
df

Unnamed: 0,A,b,C
0,"apple,cat",2,202
1,"Boston, caty",2,202


In [74]:
df["D"] = df.apply(lambda row: row['A'] * 2 + str(row['b'] * 300) + str(row['C'] * 200), axis =1 )

In [75]:
df

Unnamed: 0,A,b,C,D
0,"apple,cat",2,202,"apple,catapple,cat60040400"
1,"Boston, caty",2,202,"Boston, catyBoston, caty60040400"


In [77]:
df["E"] = df.apply(lambda row: row['A'] * 2 + str(row['b'] * 300) + str(row['C'] * 200), axis =1 )

In [78]:
df

Unnamed: 0,A,b,C,D,E
0,"apple,cat",2,202,"apple,catapple,cat60040400","apple,catapple,cat60040400"
1,"Boston, caty",2,202,"Boston, catyBoston, caty60040400","Boston, catyBoston, caty60040400"


In [80]:
df1 = df[["b"]]

In [81]:
df1

Unnamed: 0,b
0,2
1,2


In [90]:
df2 = df1.apply(lambda x: x**3)
df2

Unnamed: 0,b
0,8
1,8


In [104]:
df = pd.DataFrame()

In [105]:
df['company'] = ['A', 'A', 'B', 'C']
df

Unnamed: 0,company
0,A
1,A
2,B
3,C


In [106]:
df["Profits"] = [100, 200, 600, 800]

In [107]:
df

Unnamed: 0,company,Profits
0,A,100
1,A,200
2,B,600
3,C,800


In [112]:
df.groupby('company')['Profits'].sum().reset_index()

Unnamed: 0,company,Profits
0,A,300
1,B,600
2,C,800


In [113]:
df.groupby('company')['Profits'].sum().rank(3)

ValueError: No axis named 3 for object type Series