Merge pull request #15 from luisrodriguesphd/feat-groq-llm-api

feat: replace local llm for groq api with llama 3 8b
luisrodriguesphd · May 10, 2024 · 919cf12 · 919cf12
2 parents 70b2498 + 51ca21c
commit 919cf12
Show file tree

Hide file tree

Showing 11 changed files with 121 additions and 26 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -21,6 +21,9 @@ ENV MPLCONFIGDIR=$MPLCONFIGDIR
 ARG ENTRYPOINT_PATH="./entrypoint.sh"
 ENV ENTRYPOINT_PATH=$ENTRYPOINT_PATH
 
+# RUN --mount=type=secret,id=GROQ_API_KEY,mode=0444,required=true \
+#     echo "GROQ_API_KEY=$(cat /run/secrets/GROQ_API_KEY)"
+
 # Create the /code/ directory a ser permissions rwe
 RUN mkdir -p /code/&& \
     chmod -R 777 /code/
@@ -55,7 +58,6 @@ COPY . .
 RUN pip install -e . && \
     python src/resume_worth/pipelines/data_indexing/pipeline.py
 
-RUN python src/resume_worth/pipelines/text_generation/pipeline.py && \
-    chmod +x $ENTRYPOINT_PATH
+RUN chmod +x $ENTRYPOINT_PATH
 
 ENTRYPOINT $ENTRYPOINT_PATH
diff --git a/README.md b/README.md
@@ -1,3 +1,13 @@
+---
+title: Resume Worth
+emoji: 🤖
+colorFrom: pink
+colorTo: blue
+sdk: docker
+pinned: false
+license: apache-2.0
+---
+
 # ResumeWorth
 
 Discover Your True Market Value and Optimize Your Earnings Potential!
@@ -32,6 +42,12 @@ ResumeWorth utilizes a step-by-step process to analyze your professional backgro
 5. **Job Matching**: Get direct links to job opportunities that match your profile.
 6. **Resume-Job Match Explanation**: Understand how your resume aligns with job opportunities through detailed explanations provided by our Retrieval-Augmented Generation (RAG) technology.
 
+In addition to the inference pipeline mentioned above, there are two additional pipelines: data indexing, which indexes job vacancies in a vector store, and data collection, which gathers job vacancy information from the most renowned job sites. The complete architecture of the platform is illustrated in the figure below.
+
+<p align="center">
+  <img src="./docs/images/ResumeWorth_Architecture.svg" alt="" width="500">
+</p>
+
 <a name="gettingstarted"/></a>
 ## 3. Getting Started
 [Back to ToC](#toc)

diff --git a/conf/.env.example b/conf/.env.example
@@ -1,3 +1,6 @@
+# Groq Cloud (to text generation)
+GROQ_API_KEY="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"
+
 # OpenAI (to text generation)
 OPENAI_API_KEY=""
 OPENAI_ORG_ID=""

diff --git a/conf/params.yml b/conf/params.yml
@@ -1,3 +1,8 @@
+# Conf
+conf_dir: ["conf"]
+secrets_file: ".env"
+
+
 # Data
 ingestion_data_dir: ["data", "02_processed"]
 ingestion_metadata_dir: ["data", "02_processed", "metadata"]
@@ -16,16 +21,15 @@ embedding_dir: ["data", "03_indexed"]
 
 
 # LLM / Text Generation
+# Suggestion: huggingface|M4-ai/tau-1.8B; groq|llama3-8b-8192
 # See instructions for parameters: https://www.ibm.com/docs/en/watsonx-as-a-service?topic=lab-model-parameters-prompting
 generative_model:
-    model_name: "M4-ai/tau-1.8B"
+    model_provider: "groq"
+    model_name: "llama3-8b-8192"
     model_kwargs:
-        trust_remote_code: True
-    generate_kwargs: 
-        top_k: 30 
         top_p: 0.7
+    generate_kwargs:
         temperature: 0.3
-        max_new_tokens: 256
 # See instructions for the prompt: https://huggingface.co/spaces/Locutusque/Locutusque-Models/blob/main/app.py
 prompt_dir: ["data", "04_prompts"]
 promp_file: "prompt_template_for_explaning_why_is_a_good_fit.json"

diff --git a/docs/images/ResumeWorth_Architecture.png b/docs/images/ResumeWorth_Architecture.png
diff --git a/docs/images/ResumeWorth_Architecture.svg b/docs/images/ResumeWorth_Architecture.svg
diff --git a/docs/presentation/ResumeWorth.pptx b/docs/presentation/ResumeWorth.pptx
diff --git a/requirements.in b/requirements.in
@@ -7,6 +7,7 @@ pandas
 # to build LLM Apps
 langchain
 langchain-community
+langchain_groq
 sentence-transformers>=2.3.1
 chromadb
 

diff --git a/src/resume_worth/pipelines/text_generation/nodes.py b/src/resume_worth/pipelines/text_generation/nodes.py
@@ -1,8 +1,11 @@
 import os
 os.environ['HF_HOME'] = ".cache/huggingface"
 
+from typing import Union
+from resume_worth.utils.utils import set_secrets
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from langchain_groq import ChatGroq
 from langchain_core.prompts import PromptTemplate
 from langchain.prompts import load_prompt
 from functools import lru_cache
@@ -13,26 +16,71 @@
 
 
 #@lru_cache(maxsize=None)
+def load_text_generation_model(
+        model_provider:str='groq',
+        model_name:str='llama3-8b-8192', 
+        model_kwargs:dict={},
+        generate_kwargs:dict={
+                'temperature': 0.4,
+            },
+    ):
+    """Function to load a text generation model according to the provider."""
+
+    print(f"-> Load {model_name} text generation model from {model_provider}")
+
+    if model_provider=="huggingface":
+        return load_hf_text_generation_model_to_langchain(model_name, model_kwargs, generate_kwargs)
+
+    elif model_provider=="groq":
+        set_secrets()
+        return load_groq_text_generation_model_to_langchain(model_name, model_kwargs, generate_kwargs)
+
+    else:
+        raise Exception("Sorry, the code has no support for this provider yet.")
+
+
+def load_groq_text_generation_model_to_langchain(
+        model_name:str='llama3-8b-8192',
+        model_kwargs:dict={
+                'top_k': 50, 
+                'top_p': 0.95, 
+                'max_new_tokens': 1024,
+            },
+        generate_kwargs:dict={ 
+                'temperature': 0.4,
+            }
+    ):
+    """
+    Function to load a text generation model hosted on Groq to be used in LangChain.
+    More info, see: https://console.groq.com/docs/quickstart
+    """
+
+    groq_api_key = os.environ.get('GROQ_API_KEY', None)
+    if groq_api_key is None:
+        raise ValueError("GROQ_API_KEY is not set.")
+
+    groq = ChatGroq(model_name=model_name, model_kwargs=model_kwargs, **generate_kwargs, groq_api_key=groq_api_key)
+
+    return groq
+
+
 def load_hf_text_generation_model_to_langchain(
-    model_name:str='gpt2', 
-    model_kwargs:dict={
-            'trust_remote_code': True,
-    },
-    generate_kwargs:dict={
-            'top_k': 50, 
-            'top_p': 0.95, 
-            'temperature': 0.4, 
-            'max_new_tokens': 1024,
-        }
+        model_name:str='gpt2', 
+        model_kwargs:dict={
+                'trust_remote_code': True,
+            },
+        generate_kwargs:dict={
+                'top_k': 50, 
+                'top_p': 0.95, 
+                'temperature': 0.4, 
+                'max_new_tokens': 1024,
+            }
     ):
     """
-    Function to load a text generation model hosted on Hugging Face to se used in LangChain.
+    Function to load a text generation model hosted on Hugging Face to be used in LangChain.
     More info, see: https://python.langchain.com/docs/integrations/llms/huggingface_pipelines/
     """
 
-    print(f"-> Load a pretrained text embedding model {model_name}")
-
-    # https://huggingface.co/apple/OpenELM
     tokenizer = AutoTokenizer.from_pretrained(model_name, **model_kwargs)
     model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
 
@@ -68,10 +116,10 @@ def load_langchain_prompt_template(promp_path: str):
     return prompt
 
 
-def create_langchain_chain(prompt: PromptTemplate, hf_text_generation: HuggingFacePipeline):
+def create_langchain_chain(prompt: PromptTemplate, text_generation_model: Union[HuggingFacePipeline, ChatGroq]):
     """
-    Create a chain by composing the HF text generation model with a LangChain prompt template.
+    Create a chain by composing the text generation model with a LangChain prompt template.
     More info, see: https://python.langchain.com/docs/integrations/llms/huggingface_pipelines/
     """
-    chain = prompt | hf_text_generation
+    chain = prompt | text_generation_model
     return chain
diff --git a/src/resume_worth/pipelines/text_generation/pipeline.py b/src/resume_worth/pipelines/text_generation/pipeline.py
@@ -7,7 +7,7 @@
 
 import os
 from resume_worth.utils.utils import get_params
-from resume_worth.pipelines.text_generation.nodes import load_hf_text_generation_model_to_langchain, load_langchain_prompt_template, create_langchain_chain
+from resume_worth.pipelines.text_generation.nodes import load_text_generation_model, load_langchain_prompt_template, create_langchain_chain
 
 
 params = get_params()
@@ -20,7 +20,7 @@ def generate_explanation_why_resume_for_a_job(resume: str, job: str):
 
     # Stage 1 - [cacheable] Load text generation model
 
-    text_generation_model = load_hf_text_generation_model_to_langchain(generative_model['model_name'], generative_model['model_kwargs'], generative_model['generate_kwargs'])
+    text_generation_model = load_text_generation_model(generative_model['model_provider'], generative_model['model_name'], generative_model['model_kwargs'], generative_model['generate_kwargs'])
 
     # Stage 2 - [cacheable] Load text generation model
 
@@ -35,6 +35,9 @@ def generate_explanation_why_resume_for_a_job(resume: str, job: str):
 
     answer = text_generation_chain.invoke({"resume": resume, "job": job})
 
+    if generative_model['model_provider']!="huggingface":
+        answer = answer.content
+
     return answer
 
 

diff --git a/src/resume_worth/utils/utils.py b/src/resume_worth/utils/utils.py
@@ -4,6 +4,7 @@
 import yaml
 from langchain_community.embeddings import HuggingFaceEmbeddings
 import fitz # imports the pymupdf library
+from functools import lru_cache
 
 
 def get_params():
@@ -25,6 +26,22 @@ def get_params():
 
     return params 
 
+@lru_cache(maxsize=None)
+def set_secrets():
+    """
+    Function to set the secrets.
+    It load the parameters from .env file and set as env vars.
+    """
+    params = get_params()
+
+    secrets_path = os.path.join(params['conf_dir'], params['secrets_file'])
+
+    if os.path.exists(secrets_path):
+        from dotenv import load_dotenv
+        _ = load_dotenv(secrets_path)
+    else:
+        print(f'The secret file {secrets_path} does not exist!')
+
 
 def load_embedding_model(model_name: str = "sentence-transformers/all-mpnet-base-v2", model_kwargs: dict={}, encode_kwargs: dict={}):
     """Load a pretrained text embedding model"""