In [1]:
#  Install dependencies (run once in Colab)
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git
!pip install Pillow scikit-learn pandas numpy torch

Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-deew88iq
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-deew88iq
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->clip==1.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

In [2]:
# Imports
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import clip
import torch
from PIL import Image

In [3]:
# Define Clip Dataset (static tags)
clips = pd.DataFrame({
    'clip_id': [1, 2, 3, 4, 5, 6],
    'title': [
        'Epic clutch moment',
        'Hilarious fail in Fortnite',
        'Insane headshot kill',
        'Game-winning play',
        'Streamer rage quits',
        'Funny team voice moment'
    ],
    'tags': [
        'clutch headshot',
        'fail comedy',
        'headshot clutch',
        'highlight clutch',
        'rage fail',
        'comedy voice'
    ]
})
print("=== Clip Dataset ===")
print(clips)

=== Clip Dataset ===
   clip_id                       title              tags
0        1          Epic clutch moment   clutch headshot
1        2  Hilarious fail in Fortnite       fail comedy
2        3        Insane headshot kill   headshot clutch
3        4           Game-winning play  highlight clutch
4        5         Streamer rage quits         rage fail
5        6     Funny team voice moment      comedy voice


In [4]:
# CLIP-based Tagger (Prototype 1)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model, preprocess = clip.load('ViT-B/32', device=device)
tag_list = ['clutch', 'headshot', 'fail', 'comedy', 'highlight']
text_inputs = clip.tokenize(tag_list).to(device)
text_embeds = model.encode_text(text_inputs).float()

def get_clip_tags(image_path, top_k=2):
    """
    Given an image file path, returns top_k tags using CLIP zero-shot.
    """
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        image_embed = model.encode_image(image).float()
        logits = (100.0 * image_embed @ text_embeds.T).softmax(dim=-1)
        scores, indices = logits[0].topk(top_k)
    return [(tag_list[idx], float(scores[i])) for i, idx in enumerate(indices)]

# Example usage (uncomment and provide your own image):
# tags = get_clip_tags('your_frame.png')
# print('Predicted tags:', tags)


100%|███████████████████████████████████████| 338M/338M [00:12<00:00, 29.1MiB/s]


In [5]:
# Smart Clip Learner: build user profile (Prototype 3)
# Simulated interactions:
liked_clip_ids   = [1, 3]   # user tapped 🔥
watched_clip_ids = [2, 4]   # user fully watched

# Vectorize tags
vectorizer = CountVectorizer()
all_tags_matrix = vectorizer.fit_transform(clips['tags'])

liked_tags_matrix   = vectorizer.transform(clips[clips['clip_id'].isin(liked_clip_ids)]['tags'])
watched_tags_matrix = vectorizer.transform(clips[clips['clip_id'].isin(watched_clip_ids)]['tags'])

# Weighted summation: likes=2.0, watched=1.0
user_vector = (2.0 * liked_tags_matrix.sum(axis=0)) + (1.0 * watched_tags_matrix.sum(axis=0))
user_vector_array = np.asarray(user_vector)

# Display user preference profile
user_profile = pd.DataFrame(
    data=user_vector_array.flatten(),
    index=vectorizer.get_feature_names_out(),
    columns=['score']
).sort_values(by='score', ascending=False)
print("\n=== User Preference Vector (likes + watch) ===")
print(user_profile)



=== User Preference Vector (likes + watch) ===
           score
clutch       5.0
headshot     4.0
comedy       1.0
fail         1.0
highlight    1.0
rage         0.0
voice        0.0


In [6]:
# 6. Recommendation Engine (Prototype 2)
similarity_scores = cosine_similarity(user_vector_array, all_tags_matrix).flatten()
clips['score'] = similarity_scores

# Exclude already seen clips
seen_ids = set(liked_clip_ids + watched_clip_ids)
recommended = clips[~clips['clip_id'].isin(seen_ids)].sort_values(by='score', ascending=False).head(3)

print("\n=== Recommended Clips Based on Integrated System ===")
print(recommended[['clip_id', 'title', 'tags', 'score']])


=== Recommended Clips Based on Integrated System ===
   clip_id                    title          tags   score
4        5      Streamer rage quits     rage fail  0.1066
5        6  Funny team voice moment  comedy voice  0.1066
