In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# List of text summaries for all videos of Creator A
creator_a_summaries = [
    
    # Add more summaries here
]

# List of text summaries for 5-10 videos of competitors
competitor_summaries = [
    
    # Add more competitor summaries here
]

# Create embeddings for all summaries using TF-IDF
vectorizer = TfidfVectorizer()
all_summaries = creator_a_summaries + competitor_summaries
embeddings = vectorizer.fit_transform(all_summaries)

# Calculate cosine similarity between each video summary of Creator A and each video summary of its competitors
creator_a_embeddings = embeddings[:len(creator_a_summaries)]
competitor_embeddings = embeddings[len(creator_a_summaries):]

cosine_similarities = cosine_similarity(creator_a_embeddings, competitor_embeddings)

# Rank the competitors' embeddings scores in ascending order for each video of Creator A
for i, creator_embedding in enumerate(creator_a_embeddings):
    video_scores = cosine_similarities[i]
    sorted_indices = np.argsort(video_scores)
    
    print(f"Creator A Video {i+1} vs Competitors Rankings:")
    for idx in sorted_indices:
        print(f"Competitor {idx + 1}: {video_scores[idx]}")


In [None]:
import openai

# Set up your OpenAI API key
api_key = 'YOUR_OPENAI_API_KEY'
openai.api_key = api_key

# List of text summaries for all videos of Creator A
creator_a_summaries = [
    "Summary 1 Creator A",
    "Summary 2 Creator A",
    "Summary 3 Creator A",
    # Add more summaries here
]

# List of text summaries for 5-10 videos of competitors
competitor_summaries = [
    "Summary 1 Competitor",
    "Summary 2 Competitor",
    "Summary 3 Competitor",
    # Add more competitor summaries here
]

# Generate embeddings for Creator A's video summaries
creator_a_embeddings = []
for summary in creator_a_summaries:
    response = openai.Embed.create(model="text-davinci-003", data=summary)
    creator_a_embeddings.append(response['embedding'])

# Generate embeddings for competitors' video summaries
competitor_embeddings = []
for summary in competitor_summaries:
    response = openai.Embed.create(model="text-davinci-003", data=summary)
    competitor_embeddings.append(response['embedding'])

# Calculate cosine similarity between each video summary of Creator A and each video summary of its competitors
cosine_similarities = []
for creator_embedding in creator_a_embeddings:
    similarities = [np.dot(creator_embedding, comp_embedding) / (np.linalg.norm(creator_embedding) * np.linalg.norm(comp_embedding)) for comp_embedding in competitor_embeddings]
    cosine_similarities.append(similarities)

# Rank the competitors' embeddings scores in ascending order for each video of Creator A
for i, video_scores in enumerate(cosine_similarities):
    sorted_indices = np.argsort(video_scores)
    
    print(f"Creator A Video {i+1} vs Competitors Rankings:")
    for idx in sorted_indices:
        print(f"Competitor {idx + 1}: {video_scores[idx]}")


In [None]:
from transformers import DistilBertTokenizer, DistilBertModel
import torch

# Load the pre-trained DistilBERT model and tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained('distilbert-base-uncased')

# List of text summaries for all videos of Creator A
creator_a_summaries = [
    "Summary 1 Creator A",
    "Summary 2 Creator A",
    "Summary 3 Creator A",
    # Add more summaries here
]

# List of text summaries for 5-10 videos of competitors
competitor_summaries = [
    "Summary 1 Competitor",
    "Summary 2 Competitor",
    "Summary 3 Competitor",
    # Add more competitor summaries here
]

# Encode text summaries using the tokenizer
creator_a_encodings = tokenizer(creator_a_summaries, padding=True, truncation=True, return_tensors='pt')
competitor_encodings = tokenizer(competitor_summaries, padding=True, truncation=True, return_tensors='pt')

# Get the embeddings for Creator A's video summaries
with torch.no_grad():
    creator_a_embeddings = model(**creator_a_encodings)['last_hidden_state']

# Get the embeddings for competitors' video summaries
with torch.no_grad():
    competitor_embeddings = model(**competitor_encodings)['last_hidden_state']

# Calculate cosine similarity between each video summary of Creator A and each video summary of its competitors
cosine_similarities = torch.nn.functional.cosine_similarity(creator_a_embeddings, competitor_embeddings, dim=2)

# Rank the competitors' embeddings scores in ascending order for each video of Creator A
for i, creator_embedding in enumerate(creator_a_embeddings):
    video_scores = cosine_similarities[i]
    sorted_indices = torch.argsort(video_scores)
    
    print(f"Creator A Video {i+1} vs Competitors Rankings:")
    for idx in sorted_indices:
        print(f"Competitor {idx + 1}: {video_scores[idx]}")
