In [1]:
import os, uuid
import chromadb
import assemblyai as aai
import openai, cohere
import google.generativeai as genai
from fastapi import FastAPI, UploadFile, File, HTTPException, Request
from fastapi.responses import JSONResponse
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from moviepy.video.io.VideoFileClip import VideoFileClip
# Load environment variables
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
# Initialize AssemblyAI Client
aai.settings.api_key = os.getenv('ASSEMBLYAI_API_KEY')
aai_config = aai.TranscriptionConfig(speech_model=aai.SpeechModel.nano, auto_chapters=True)
transcriber = aai.Transcriber(config=aai_config)

In [3]:
VideoFileClip("temp/IOT.mp4").audio.write_audiofile("temp/IOT.wav")

MoviePy - Writing audio in temp/IOT.wav


                                                                      

MoviePy - Done.




In [5]:
transcript = transcriber.transcribe("temp/IOT.wav", aai_config)

In [6]:
chroma_client = chromadb.PersistentClient()
movie_collection = chroma_client.create_collection(name="movie_summaries", get_or_create=True)
cohere_client = cohere.Client(api_key=os.getenv('COHERE_API_KEY'))
client = openai.OpenAI(
    api_key=os.environ.get("SAMBANOVA_API_KEY"),
    base_url="https://api.sambanova.ai/v1",
)


In [10]:
transcript.chapters[0].summary

'Sensors and actuators, connectivity, data processing, user interfaces and security. Applications include robotic industrial automation, smart homes, healthcare connectivity. Data analytics include applying statistical and machine learning techniques to extract actionable insights.'

In [12]:
response = cohere_client.embed(model="embed-multilingual-light-v3.0",
                            texts=[transcript.chapters[0].summary],
                            input_type="search_document",
                            embedding_types=["float"],
                            truncate='NONE')


In [18]:
response.embeddings.float[0]

[-0.060821533,
 0.023483276,
 0.021697998,
 -0.008560181,
 0.030288696,
 0.004837036,
 0.032073975,
 -0.005882263,
 0.008918762,
 0.033599854,
 -0.030899048,
 0.0060653687,
 0.051086426,
 -0.0028820038,
 -0.0138549805,
 0.017578125,
 -0.050750732,
 -0.052825928,
 0.07611084,
 0.07879639,
 -0.0062446594,
 -0.050231934,
 -0.1104126,
 -0.019989014,
 0.015289307,
 0.032348633,
 -0.03543091,
 -0.1149292,
 0.0052757263,
 0.04449463,
 -0.07940674,
 0.0010757446,
 0.049102783,
 -0.012512207,
 0.03286743,
 0.0602417,
 -0.018539429,
 -0.046539307,
 -0.06842041,
 -0.08135986,
 0.014060974,
 -0.07043457,
 0.109191895,
 0.013496399,
 -0.010139465,
 0.023284912,
 0.03579712,
 -0.047698975,
 -0.07635498,
 -0.099853516,
 -0.038970947,
 -0.00038790703,
 0.015701294,
 0.087768555,
 0.0519104,
 -0.0072631836,
 0.0065345764,
 0.059020996,
 -0.012413025,
 -0.08917236,
 -0.041168213,
 -0.05795288,
 0.12133789,
 0.0078125,
 0.0051460266,
 -0.062286377,
 0.07696533,
 0.00023961067,
 -0.06124878,
 -0.029922485

In [29]:
results = movie_collection.query(
    query_embeddings=[embedding],
    n_results=5
)
results['documents'][0]

Number of requested results 5 is greater than number of elements in index 1, updating n_results = 1


['HRM is a strategic approach to managing people the workforce of an organization. It involves the practices and policies needed to carry out and the people or human resources aspects of management position including recruit, training, development and compensation. Ron AEN presents presentation on meaning, scope, objectives and functions of HRM.']

In [44]:
transcriber.list_transcripts().transcripts


[TranscriptItem(audio_url='https://cdn.assemblyai.com/upload/7c70edfb-86f9-4ec1-91e6-5b8f19b6a376', completed='2024-11-21T19:21:03.160337', created='2024-11-21T19:20:55.771469', error=None, id='08ee44aa-1b5a-4bbb-9a90-e64a7a3e5d41', resource_url='https://api.assemblyai.com/v2/transcript/08ee44aa-1b5a-4bbb-9a90-e64a7a3e5d41', status=<TranscriptStatus.completed: 'completed'>),
 TranscriptItem(audio_url='https://cdn.assemblyai.com/upload/c33a7d7b-9c0c-4f47-96b4-289f1873e231', completed='2024-11-21T18:35:21.437543', created='2024-11-21T18:35:15.014344', error=None, id='a5ac1cfd-b77a-418a-a954-1863d0c759b5', resource_url='https://api.assemblyai.com/v2/transcript/a5ac1cfd-b77a-418a-a954-1863d0c759b5', status=<TranscriptStatus.completed: 'completed'>),
 TranscriptItem(audio_url='https://cdn.assemblyai.com/upload/f9295e05-8ad0-4b8b-a2af-5199d6d5dbc1', completed='2024-11-21T18:18:43.882540', created='2024-11-21T18:18:37.732063', error=None, id='bf8e15aa-8c6c-4b31-bd65-0ac851f454db', resource_ur