In [None]:
!pip install sentence_transformers umap umap-learn langchain cohere faiss-cpu textract moviepy moviepy google-cloud-speech pandas

In [None]:
!pip install opencv-python-headless ipywidgets pytube

In [None]:
!pip install git+https://github.com/openai/whisper.git
!sudo apt update && sudo apt install ffmpeg
!pip install librosa

import whisper
import time
import librosa
import soundfile as sf
import re
import os
import pandas as pd

tiny_model = whisper.load_model("tiny.en")
base_model = whisper.load_model("base.en")
small_model = whisper.load_model("small.en")

In [None]:
import os
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip

input_video_file = "/content/test.avi"
output_folder = "/content/video_fragments/"
os.makedirs(output_folder, exist_ok=True)

def split_video_into_fragments(input_video_file, fragment_duration=10):
    clip = VideoFileClip(input_video_file)
    duration = int(clip.duration)

    fragment_start = 0
    fragment_end = fragment_duration

    fragment_num = 1

    while fragment_end <= duration:
        output_file = os.path.join(output_folder, f"fragment_{fragment_num}.avi")
        ffmpeg_extract_subclip(input_video_file, fragment_start, fragment_end, targetname=output_file)

        fragment_start += fragment_duration
        fragment_end += fragment_duration
        fragment_num += 1

split_video_into_fragments(input_video_file, fragment_duration=10)

In [None]:
transcriptions = []
file_names = []

for i in range(1, 8):
    video_file = f'fragment_{i}.avi'
    video_path = "video_fragments/" + video_file
    audio_path = "video_fragments/" + video_file[:-4] + ".wav"

    y, sr = librosa.load(video_path, sr=16000)
    sf.write(audio_path, y, sr)

    result = tiny_model.transcribe(audio_path)
    text = result["text"].strip()
    text = text.replace(". ", ".\n\n")

    text_file = video_file[:-4] + ".txt"
    text_path = "video_fragments/" + text_file
    with open(text_path, "w") as f:
        f.write(text)
    transcriptions.append(text)
    file_names.append(video_file)

df = pd.DataFrame({'File Name': file_names, 'Transcription': transcriptions})
print(df)

In [None]:
df

In [None]:
!pip install sentence-transformers

In [None]:
!pip install umap-learn
import umap.umap_ as umap

In [None]:
!pip install cohere

In [None]:
df.to_csv("df.csv")

In [None]:
from langchain.embeddings import CohereEmbeddings

In [None]:
import csv
import seaborn as sns
from scipy import spatial
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer

map = {}
with open('df.csv', newline='') as csvfile:
    fragments = csv.reader(csvfile, delimiter=',', quotechar='"')
    next(fragments)
    for row in fragments:
        id, filename, transcription = row
        map[transcription] = filename

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
transcriptions = list(map.keys())
embeddings = model.encode(transcriptions)

embeddings = {map[paragraph]: embedding for paragraph, embedding in zip(transcriptions, embeddings)}
embeddings.values()

In [None]:
import umap.umap_ as umap

In [None]:
reducer = umap.UMAP()
scaler = StandardScaler()
scaled_data = scaler.fit_transform(list(embeddings.values()))
reduced_data = reducer.fit_transform(scaled_data)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

user_input = input("Enter a text for semantic search: ")
user_input_embedding = model.encode(user_input)

similarities = cosine_similarity([user_input_embedding], list(embeddings.values()))
most_similar_indices = similarities.argsort()[0][::-1]

print("\nMost similar:")
for idx in most_similar_indices:
    info = df.iloc[idx]
    print(f"Transcription:", info, "Similarity:", similarities[0][idx].round(4))
    print()