# gemini-analysis-starter-notebook

In [15]:
import os
import json
from PyPDF2 import PdfReader
import numpy as np
import faiss
from dotenv import load_dotenv
import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [9]:
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key = GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro')

In [3]:
embedding_model = GoogleGenerativeAIEmbeddings(
    model = "models/embedding-001", 
    google_api_key = GOOGLE_API_KEY
)

In [None]:
def get_embedding(text):
    embedding_vector = embedding_model.embed_query(text)
    return embedding_vector

In [40]:
def get_text_chunks(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = splitter.split_text(text)
    return chunks

In [29]:
def get_average_embedding(text):
    chunks = get_text_chunks(text) # always chunk before embedding -> embeddings rep more meaningful semantic units
    embeddings = [get_embedding(chunk) for chunk in chunks]
    average_embedding = np.mean(embeddings, axis=0)
    return average_embedding

In [10]:
json_path = "all_jobs.json"

with open(json_path, 'r') as f:
    jobs = json.load(f)

In [28]:
text = ""
resume_path = "resume.pdf"
pdf_reader = PdfReader(resume_path)
for page in pdf_reader.pages:
    text += page.extract_text()

In [65]:
resume_embedding = get_average_embedding(text)

In [83]:
job_embeddings_all = []
job_embeddings_relevant = []
for job_title, job in jobs.items():
    
    all_values = " ".join(f"{key}: {value}" for key, value in job.items())
    relevant_values = " ".join(f"{key}: {value}" for key, value in job.items() if key in ['title','position_description', 'qualifications'])
    
    all_values_embedding = get_average_embedding(all_values)
    relevant_values_embedding = get_average_embedding(relevant_values)
    
    job_embeddings_all.append({"job": job, "embedding": relevant_values_embedding})
    job_embeddings_relevant.append({"jobs": job, "embedding": relevant_values_embedding})

In [90]:
def build_faiss_index(embeddings):
    dimension = len(embeddings[0]['embedding'])
    index = faiss.IndexFlatL2(dimension)
    embeddings_array = np.array([emb['embedding'] for emb in embeddings], dtype=np.float32)
    # populate index
    index.add(embeddings_array)
    return index

In [85]:
index = build_faiss_index(job_embeddings_relevant)

In [104]:
def search_faiss_index(index, query_embedding, k=15):
    query_embedding_np = np.array([query_embedding], dtype=np.float32).reshape(1, -1)
    distances, indices = index.search(query_embedding_np, k)
    return distances, indices

In [105]:
distances, indices = search_faiss_index(index, resume_embedding)

In [106]:
def process_results(distances, indices, job_embeddings):
    ranked_jobs = []
    for distance, index in zip(distances[0], indices[0]):
        ranked_jobs.append({
           "job": job_embeddings[index]["job"],
        })
        
    return ranked_jobs

In [107]:
ranked_jobs = process_results(distances, indices, job_embeddings)

In [118]:
def generate_gemini_prompt(resume_text, ranked_job):
    prompt = f"""
    Analyze the following job posting and the provided resume.
    Based on the skills, experience, and requirements in the job descriptions, give me the top 5 jobs the candidate should apply for.
    No need for explanations. 

    Resume:
    {resume_text}

    Job Posting:
    {ranked_jobs}

    """
    return prompt

In [121]:
prompt = generate_gemini_prompt(text, ranked_jobs)
response = model.generate_content(prompt)

In [122]:
print(response.text)

1. Electrical Engineering Co-op at Schweitzer Engineering Laboratories Inc
2. Power Engineer Co-op in Research and Development at Schweitzer Engineering Laboratories Inc
3. Electrical Engineering Co-op at Exelon
4. Power Engineering (Electrical or Mechanical) Co-Op at ExecuPOWER, LLC
5. Power Engineer Co-op at Schweiter Engineering Laboratories Inc
