In [1]:
if 'google.colab' in str(get_ipython()):
  from google.colab import drive
  drive.mount('/content/drive')

In [2]:
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.prompts import load_prompt
# from pymongo import MongoClient
# from pymongo.mongo_client import MongoClient
# from pymongo.server_api import ServerApi
import json
import time
import datetime
import pypdf

In [15]:
# from pathlib import Path
# path = Path("/content/drive/MyDrive/Colab_Notebooks/resume-bot")

In [82]:
prompt_template = "templates/template.json"
faiss_index = "faiss_index"
data_source = "data/about_hari.csv"
pdf_source = "data/HarikrishnaDev_DataScientist.pdf"

In [83]:
pdf_source

'data/HarikrishnaDev_DataScientist.pdf'

In [84]:
def is_valid_json(data):
    try:
        json.loads(data)
        return True
    except json.JSONDecodeError:
        return False

In [86]:
import os
os.environ['OPENAI_API_KEY'] = "Enter your API key here"

In [87]:
embeddings = OpenAIEmbeddings()

In [88]:
pdf_loader = PyPDFLoader(pdf_source)
pdf_data = pdf_loader.load()

In [89]:
pdf_data

[Document(page_content='Harikrishna (Harry) Dev  \nharikrish0607@gmail.com ; +1 (425) 469 -2587 ; Dallas, TX (Open to relocation)  \nlinkedin.com/in/harikrishnad1997 ; harikrishnad1997.github.io ; github.com/harikrishnad1997  \n \nPROFESSIONAL  EXPERIENCE  \nDaifuku Holding Pvt Ltd , Novi, MI  May 2023  – August 2023  \nIT Analyst Intern – Data Science  \n• Developed  proprietary Generative AI tool to perform text mining on report documents and  provide source \ncitations by leveraging the HuggingFace  API and Natural Language Processing , reducing  2 hours of daily work.  \n• Augmented  the Paint Line assembly efficiency by 48% by identifying bottlenecks and forecasting recurring issues \nin the assembly line using Regularized Linear modelling  techniques such as Elastic Net and Facebook Prophet.  \n• Designed a Power  BI dashboard to visualize  and track employee performance metrics, which saved senior \nleadership 3 days of work every month  by connecting data sources to Microsoft A

In [90]:
csv_loader = CSVLoader(file_path="data/about_hari.csv", encoding="utf-8")
csv_data = csv_loader.load()

In [91]:
csv_data

[Document(page_content='question: what is his educational background| tell me about his education| where did he go to school\nanswer: Harikrishna (Harry) Dev holds a Bachelor of Technology in Mechanical Engineering from National Institute of Technology Karnataka, India (graduated May 2019) and is currently pursuing a Master of Sciences in Business Analytics - Data Science cohort at The University of Texas at Dallas (expected graduation December 2023), where he has taken leadership roles as a Graduate Student Assistant and Technical Officer of the Data Science Club.', metadata={'source': 'data/about_hari.csv', 'row': 0}),
 Document(page_content='question: does he have any certificates? what kind of certificate he has?\nanswer: Harikrishna Dev has certifications and training in the following: Python Data Structures, Hadoop in Real World, IBM: Data Science Methodology He also has hand on experience with AWS, GCP, Azure, Databricks, and Snowflake', metadata={'source': 'data/about_hari.csv'

In [92]:
# Combine PDF and CSV data
data = pdf_data + csv_data

# Create embeddings for the docs
vectors = FAISS.from_documents(data, embeddings)
vectors.save_local("faiss_index")

In [93]:
openai_api_key = os.getenv("OPENAI_API_KEY")

In [94]:
prompt = load_prompt("templates/template.json")

In [95]:
retriever = vectors.as_retriever(search_type="similarity", search_kwargs={"k":6, "include_metadata":True, "score_threshold":0.6})
chain = ConversationalRetrievalChain.from_llm(llm=ChatOpenAI(temperature=0.5,model_name='gpt-3.5-turbo', openai_api_key=openai_api_key), 
                                                retriever=retriever,return_source_documents=True,verbose=True,chain_type="stuff",
                                                max_tokens_limit=4097, combine_docs_chain_kwargs={"prompt": prompt})


In [96]:
chat_history = []
result = chain({"system": 
        "You are a Resume Bot, a comprehensive, interactive resource for exploring Harikrishna (Harry) Dev's background, skills, and expertise. Be polite and provide answers based on the provided context only. Use only the provided data and not prior knowledge.", 
                        "question": "Has Hari worked on GenAI?",
                        "chat_history":chat_history})



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a Resume Bot, a comprehensive, interactive resource for exploring Harikrishna (Harry) Dev's background, skills, and expertise. Be polite and provide answers based on the provided context only. Use only the provided data and not prior knowledge. 
 Human: Take a deep breath and do the following step by step these 4 steps:  
 1. Read the context below 
 2. Answer the question with detail using the provided Help Centre information 
 3. Make sure to nicely format the output is a three paragraph answer and try to incorporate the STAR format. 
 4. Provide 3 examples of questions user can ask about Harikrishna Dev based on the questions from context. Context : 
 ~~~ question: Has Hari worked with VertexAI? | Has Hari worked with OpenAI API? | Has Hari worked with HuggingFace API?
answer: Yes, Harikrishna has worked with the HuggingFace API, Azur

In [97]:
result

{'system': "You are a Resume Bot, a comprehensive, interactive resource for exploring Harikrishna (Harry) Dev's background, skills, and expertise. Be polite and provide answers based on the provided context only. Use only the provided data and not prior knowledge.",
 'question': 'Has Hari worked on GenAI?',
 'chat_history': [],
 'answer': '{\n  "answered": true,\n  "response": "Yes, Harikrishna has worked on GenAI. His favorite project is the GenAI application he developed during his internship at Daifuku. This project involved building an AI-powered tool to support the legal team in managing compliance documentation, which decreased 2 hours of daily grunt work.",\n  "questions": [\n    "What was Harikrishna\'s favorite project?",\n    "Can you provide more details about the GenAI application Harikrishna worked on?",\n    "How did GenAI impact the daily workflow at Daifuku?"\n  ]\n}',
 'source_documents': [Document(page_content='question: Has Hari worked with VertexAI? | Has Hari worke

In [98]:
result['answer']

'{\n  "answered": true,\n  "response": "Yes, Harikrishna has worked on GenAI. His favorite project is the GenAI application he developed during his internship at Daifuku. This project involved building an AI-powered tool to support the legal team in managing compliance documentation, which decreased 2 hours of daily grunt work.",\n  "questions": [\n    "What was Harikrishna\'s favorite project?",\n    "Can you provide more details about the GenAI application Harikrishna worked on?",\n    "How did GenAI impact the daily workflow at Daifuku?"\n  ]\n}'

In [99]:
data = json.loads(result["answer"])

In [100]:
data.get("response")

'Yes, Harikrishna has worked on GenAI. His favorite project is the GenAI application he developed during his internship at Daifuku. This project involved building an AI-powered tool to support the legal team in managing compliance documentation, which decreased 2 hours of daily grunt work.'

In [101]:
data.get("questions")

["What was Harikrishna's favorite project?",
 'Can you provide more details about the GenAI application Harikrishna worked on?',
 'How did GenAI impact the daily workflow at Daifuku?']

In [102]:
chat_history.append((result["question"], result["answer"]))

In [103]:
chat_history

[('Has Hari worked on GenAI?',
  '{\n  "answered": true,\n  "response": "Yes, Harikrishna has worked on GenAI. His favorite project is the GenAI application he developed during his internship at Daifuku. This project involved building an AI-powered tool to support the legal team in managing compliance documentation, which decreased 2 hours of daily grunt work.",\n  "questions": [\n    "What was Harikrishna\'s favorite project?",\n    "Can you provide more details about the GenAI application Harikrishna worked on?",\n    "How did GenAI impact the daily workflow at Daifuku?"\n  ]\n}')]

In [104]:
result = chain({"system": 
        "You are a Resume Bot, a comprehensive, interactive resource for exploring Harikrishna (Harry) Dev's background, skills, and expertise. Be polite and provide answers based on the provided context only. Use only the provided data and not prior knowledge.", 
                        "question": "Can you provide more details about the GenAI application Harikrishna worked on?",
                        "chat_history":chat_history})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: Has Hari worked on GenAI?
Assistant: {
  "answered": true,
  "response": "Yes, Harikrishna has worked on GenAI. His favorite project is the GenAI application he developed during his internship at Daifuku. This project involved building an AI-powered tool to support the legal team in managing compliance documentation, which decreased 2 hours of daily grunt work.",
  "questions": [
    "What was Harikrishna's favorite project?",
    "Can you provide more details about the GenAI application Harikrishna worked on?",
    "How did GenAI impact the daily workflow at Daifuku?"
  ]
}
Follow Up Input: Can you provide more details about the GenAI application Harikrishna worked on?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering ne

In [105]:
result["answer"]

'{\n    "answered": true,\n    "response": "Harikrishna\'s favorite project is the GenAI application he developed during his internship at Daifuku. This project involved building an AI-powered tool to support the legal team in managing compliance documentation, which decreased 2 hours of daily grunt work. The GenAI application utilized Natural Language Processing and the HuggingFace API for text mining reports, demonstrating Harikrishna\'s expertise in AI and machine learning applications in real-world scenarios.",\n    "questions": [\n        "What technologies did Harikrishna use in developing the GenAI application?",\n        "How did the GenAI application impact the daily workflow at Daifuku?",\n        "Can you provide more details about the specific features of the GenAI application?"\n    ]\n}'

In [106]:
chat_history.append((result["question"], result["answer"]))

In [107]:
result = chain({"system": 
        "You are a Resume Bot, a comprehensive, interactive resource for exploring Harikrishna (Harry) Dev's background, skills, and expertise. Be polite and provide answers based on the provided context only. Use only the provided data and not prior knowledge.", 
                        "question": "Does he know Snowflake?",
                        "chat_history":chat_history})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: Has Hari worked on GenAI?
Assistant: {
  "answered": true,
  "response": "Yes, Harikrishna has worked on GenAI. His favorite project is the GenAI application he developed during his internship at Daifuku. This project involved building an AI-powered tool to support the legal team in managing compliance documentation, which decreased 2 hours of daily grunt work.",
  "questions": [
    "What was Harikrishna's favorite project?",
    "Can you provide more details about the GenAI application Harikrishna worked on?",
    "How did GenAI impact the daily workflow at Daifuku?"
  ]
}
Human: Can you provide more details about the GenAI application Harikrishna worked on?
Assistant: {
    "answered": true,
    "response": "Harikrishna's favorite projec

In [108]:
result["answer"]

'{\n    "answered": true,\n    "response": "Yes, Harikrishna has experience with Snowflake. He utilized Snowflake during his time at Walmart to train and deploy machine learning models and manage big data warehouses. His proficiency with Snowflake contributed to the success of projects involving data analytics and machine learning.",\n    "questions": [\n        "What projects has Harikrishna worked on using Snowflake?",\n        "How did Harikrishna leverage Snowflake in his machine learning projects?",\n        "Can Harikrishna provide examples of the impact of using Snowflake in data warehouse management?"\n    ]\n}'