<a href="https://colab.research.google.com/github/prtkmhn/ATS-Resume-Reviewer/blob/prateek-development/ATS_TALK_SCREENSHOT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import time
import subprocess
import requests
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
import json
from langchain.chains import LLMChain
import speech_recognition as sr
import pyttsx3
from langchain.chains.retrieval import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_openai import OpenAIEmbeddings
from pynput import keyboard
import asyncio
from asyncio import WindowsSelectorEventLoopPolicy
import pyautogui
import cv2
import numpy as np
from PIL import Image
from langchain.document_loaders import TextLoader
import google.generativeai as genai

asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy())

load_dotenv()
os.environ['GOOGLE_API_KEY'] = "YOUR API KEY"
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
# Initialize Gemini Pro Vision model
vision_model = genai.GenerativeModel('gemini-pro-vision')
# Initialize Gemini Pro model
gemini_model = genai.GenerativeModel('gemini-pro')

class LanguageModelProcessor:
    def __init__(self, max_tokens=256, temperature=0.7):
        self.llm = ChatGroq(temperature=temperature, model_name="llama3-70b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), max_tokens=max_tokens)
        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

        with open('system_prompt.txt', 'r') as file:
            system_prompt = file.read().strip()

        self.embed_model = "text-embedding-3-small"
        self.embeddings = OpenAIEmbeddings(model=self.embed_model, openai_api_key=os.getenv("OPENAI_API_KEY"))

        self.documents = []

        self.vector_store = None
        self.retriever = None
        self.retrieve_chain = None

        self.prompt = ChatPromptTemplate.from_template('''
            Answer question based on provided context.
            <context>
            {context}
            </context>

            Question: {input}
        ''')

        self.document_chain = create_stuff_documents_chain(self.llm, self.prompt)

        self.conversation_prompt = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(system_prompt),
            MessagesPlaceholder(variable_name="chat_history"),
            HumanMessagePromptTemplate.from_template("{text}")
        ])

        self.conversation = LLMChain(
            llm=self.llm,
            prompt=self.conversation_prompt,
            memory=self.memory
    )

    def process(self, text):
        self.memory.chat_memory.add_user_message(text)

        start_time = time.time()
        if self.retrieve_chain is not None:
            response = self.retrieve_chain.invoke({"input": text})
        else:
            response = self.conversation.predict(text=text)
        end_time = time.time()

        if isinstance(response, str):
            response = {'answer': response}

        self.memory.chat_memory.add_ai_message(response['answer'])

        elapsed_time = int((end_time - start_time) * 1000)
        print(f"LLM ({elapsed_time}ms): {response['answer']}")
        return response['answer']


class TextToSpeech:
    def __init__(self):
        self.engine = pyttsx3.init()

    def speak(self, text):
        self.engine.say(text)
        self.engine.runAndWait()

    def stop(self):
        self.engine.stop()

class SpeechToText:
    def __init__(self):
        self.recognizer = sr.Recognizer()

    def listen(self):
        with sr.Microphone() as source:
            print("Listening...")
            audio = self.recognizer.listen(source)

        try:
            text = self.recognizer.recognize_google(audio)
            print(f"Human: {text}")
            return text
        except sr.UnknownValueError:
            print("Could not understand audio")
        except sr.RequestError as e:
            print(f"Could not request results from Google Speech Recognition service; {e}")

        return ""


class ConversationManager:
    def __init__(self, max_tokens=256, temperature=0.7):
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.llm = LanguageModelProcessor(max_tokens=self.max_tokens, temperature=self.temperature)

    def on_press(self, key):
        if key == keyboard.KeyCode.from_char('x'):
            self.stop_listening = True
            self.tts.stop()
            time.sleep(0.5)  # Add a small delay to ensure the speech is fully stopped
            self.stt.listen()

    async def take_screenshot(self):
        screenshot = pyautogui.screenshot()
        screenshot = pyautogui.screenshot("Tempimage.png")
        return screenshot

    async def process_screenshot(self, screenshot):
        response1 = vision_model.generate_content(["Please extract the company name, job name, and job description from this image.", screenshot], stream=True)
        response1.resolve()
        return response1.text

    def add_text_file_to_embedding(self, file_path):
        loader = TextLoader(file_path)
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.llm.documents.extend(text_splitter.split_documents(docs))
        self.llm.vector_store = FAISS.from_documents(self.llm.documents, self.llm.embeddings)
        self.llm.retriever = self.llm.vector_store.as_retriever()
        self.llm.retrieve_chain = create_retrieval_chain(self.llm.retriever, self.llm.document_chain)

    async def main(self):
        self.tts = TextToSpeech()
        self.stt = SpeechToText()

        listener = keyboard.Listener(on_press=self.on_press)
        listener.start()

        while True:
            input("Press Enter to start listening...")
            transcription_response = self.stt.listen()

            if "goodbye" in transcription_response.lower():
                break

            if "screenshot" in transcription_response.lower():
                screenshot = await self.take_screenshot()
                processed_screenshot = await self.process_screenshot(screenshot)
                print("Screenshot taken and processed.")
                print(f"Extracted information: {processed_screenshot}")
                transcription_response = self.stt.listen()

            if "textfile" in transcription_response.lower():
                file_path = input("Enter the path to the text file: ")
                self.add_text_file_to_embedding(file_path)
                print(f"Text file '{file_path}' added to the embedding.")
                continue

            if transcription_response.strip():
                llm_response = self.llm.process(transcription_response)
                self.tts.speak(llm_response)

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description='Talk to your LLM')
    parser.add_argument('--max_tokens', type=int, default=70, help='Maximum number of tokens for LLM output')
    parser.add_argument('--temperature', type=float, default=0.3, help='Temperature for LLM')
    args = parser.parse_args()

    manager = ConversationManager(max_tokens=args.max_tokens, temperature=args.temperature)
    asyncio.run(manager.main())