In [None]:
import os
import moviepy.editor as mp
import speech_recognition as sr
import transformers
model_name = 'Intel/neural-chat-7b-v3-1'
model = transformers.AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)


In [None]:

def generate_response(system_input, user_input):

    # Format the input using the provided template
    prompt = f"### System:\n{system_input}\n### User:\n{user_input}\n### Assistant:\n"

    # Tokenize and encode the prompt
    inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False)

    # Generate a response
    outputs = model.generate(inputs, max_length=1500, num_return_sequences=1, pad_token_id = tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the assistant's response
    return response.split("### Assistant:\n")[-1]



In [None]:
def generate_summary(text):    
    system_input = "You are a summary writing assistant. Your mission is to help users generate concise yet beautiful summaries within 350 words on the input topic."
    question = f"Can you generate 350 word summary based on the following topic:{text}"
    response = generate_response(system_input, question)
    return (response)

In [None]:
# Function to convert audio to transcript using SpeechRecognition
def convert_audio_to_transcript(audio_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
        transcript = recognizer.recognize_google(audio_data)
    return transcript

# Function to convert video to transcript
def convert_video_to_transcript(video_path):
    # Convert video to audio
    audio_path = 'temp_audio.wav'
    video = mp.VideoFileClip(video_path)
    audio = video.audio
    audio.write_audiofile(audio_path)

    # Convert audio to transcript
    transcript = convert_audio_to_transcript(audio_path)

    # Delete temporary audio file
    os.remove(audio_path)

    return transcript

# Main function to handle different types of input
def handle_input(input_data):
    # Detect input type
    if input_data.endswith('.mp4') or input_data.endswith('.avi'):
        # Input is a video file
        transcript = convert_video_to_transcript(input_data)
    elif input_data.endswith('.wav') or input_data.endswith('.mp3'):
        # Input is an audio file
        transcript = convert_audio_to_transcript(input_data)
    elif input_data.endswith('.txt'):
        # Input is a text file
        with open(input_data, 'r') as file:
            transcript = file.read()
    else:
        # Input is text
        transcript = input_data
    
    return transcript


In [None]:
def main():
    # Get input from the user
    input_data = input("Enter the path to the file or input text: ")
    
    # Handle input and convert to transcript
    transcript = handle_input(input_data)
    print(transcript)
    
    # Generate summary from transcript
    summary = generate_summary(transcript)
    
    # Output summary
    print("Summary:", summary)

if __name__ == '__main__':
    main()