In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install langchain sentence-transformers faiss-cpu transformers accelerate bitsandbytes

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting async-timeout<5.0.0,>=4.0.0 (from langchain)
  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m50.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)
Installing collected packages: async-timeout, faiss-cpu, bitsandbytes
  Attempting uninstall: async-timeout
    Found existing installation: async-timeout 5.0.1
    Uninstalling as

In [3]:
import faiss
import json
from sentence_transformers import SentenceTransformer
import os

# 1. Prepare Resume Best Practices Data:

if os.path.exists("resume_best_practices_software_data.json"):
    with open("resume_best_practices_software_data.json", "r", encoding="utf-8") as f:
        resume_best_practices = json.load(f)
else:
    resume_best_practices = {
        "Software_Experience": [
            "Quantify software development achievements (e.g., 'Reduced bug reports by 20%').",
            "Highlight experience with specific programming languages, frameworks, and tools relevant to the target role (e.g., Python, Java, React, AWS).",
            "Describe contributions to open-source projects or significant personal projects.",
            "Showcase experience with Agile methodologies (Scrum, Kanban).",
            "Emphasize experience with version control systems (Git).",
            "Detail experience with testing frameworks and practices (unit testing, integration testing).",
            "Showcase experience with CI/CD pipelines."
        ],
        "Software_Skills": [
            "List technical skills prominently, grouping them by category (e.g., Programming Languages, Cloud Technologies, Databases).",
            "Prioritize skills mentioned in the job description.",
            "Include both front-end and back-end technologies if applicable.",
            "Mention any relevant certifications (e.g., AWS Certified Solutions Architect).",
            "Be specific about versions or levels of proficiency (e.g., 'Python 3.7+,' 'AWS Certified Solutions Architect - Associate')."
        ],
        "Software_Projects": [
            "Describe software projects in detail, focusing on the technologies used, the challenges overcome, and the results achieved.",
            "Include links to GitHub repositories or live demos if possible.",
            "Quantify the impact of your projects (e.g., 'Developed a web application that increased user engagement by 30%').",
            "Highlight any leadership roles or contributions to team projects."
        ],
        "Data_Experience": [
            "Quantify data-related accomplishments (e.g., 'Improved model accuracy by 10%').",
            "Highlight experience with data analysis tools and techniques (e.g., Pandas, NumPy, Scikit-learn).",
            "Describe experience with machine learning algorithms and models.",
            "Showcase experience with data visualization tools (e.g., Tableau, Power BI).",
            "Emphasize experience with big data technologies (e.g., Hadoop, Spark) if relevant.",
            "Detail experience with database systems (SQL, NoSQL).",
            "Mention experience with cloud-based data services (e.g., AWS S3, Azure Blob Storage)."
        ],
        "Data_Skills": [
            "List data-related skills prominently, grouping them by category (e.g., Data Analysis, Machine Learning, Big Data).",
            "Prioritize skills mentioned in the job description.",
            "Mention specific tools, libraries, and frameworks (e.g., TensorFlow, PyTorch).",
            "Include experience with data mining, data cleaning, and data preprocessing techniques.",
            "List any relevant certifications (e.g., Google Data Analytics Professional Certificate)."
        ],
        "Data_Projects": [
            "Describe data science or data engineering projects in detail, focusing on the data sources, the methodologies used, and the insights gained.",
            "Include metrics to demonstrate the success of your projects.",
            "Highlight any contributions to data-driven decision-making.",
            "Showcase experience with deploying machine learning models to production."
        ],
        "General": [ # General advice applicable to both software and data roles
            "Tailor your resume to each specific job application.",
            "Use action verbs to start your bullet points.",
            "Focus on the impact of your work.",
            "Keep your resume concise and easy to read.",
            "Proofread carefully for any errors."
        ]
    }

    with open("resume_best_practices_software_data.json", "w", encoding="utf-8") as f:
        json.dump(resume_best_practices, f, indent=4)

# 2. Create Embeddings and FAISS Index:

model = SentenceTransformer('all-mpnet-base-v2')  # Or any other suitable model

embeddings = []
data = []  # Store the best practices and their metadata

for category, practices in resume_best_practices.items():
    for practice in practices:
        embedding = model.encode(practice)
        embeddings.append(embedding)
        data.append({"category": category, "practice": practice})  # Store category for retrieval

embeddings_np = np.array(embeddings).astype('float32')
d = embeddings_np.shape[1]

# Choose appropriate FAISS index. IndexFlatL2 is good for smaller datasets. For larger datasets, use IndexIVFFlat or IndexHNSWFlat.
index = faiss.IndexFlatL2(d)  # L2 distance for similarity search
index.add(embeddings_np)

faiss.write_index(index, "resume_best_practices_index.faiss")

with open("resume_best_practices_data.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=4)

print("FAISS index and metadata saved.")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

FAISS index and metadata saved.


In [4]:
# --- Retrieval (Example) ---
index = faiss.read_index("resume_best_practices_index.faiss")
with open("resume_best_practices_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

query = "How should I describe my work experience?"  # Example query
query_embedding = model.encode(query).astype("float32").reshape(1, -1)
D, I = index.search(query_embedding, k=3)  # Retrieve top 3

retrieved_practices = [data[i]["practice"] for i in I[0]]
print("Retrieved Best Practices:")
for practice in retrieved_practices:
    print(f"- {practice}")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Retrieved Best Practices:
- Keep your resume concise and easy to read.
- Highlight experience with specific programming languages, frameworks, and tools relevant to the target role (e.g., Python, Java, React, AWS).
- Focus on the impact of your work.
