# Start here
1. Upload your resume into the root directory and remove `resume_example.pdf`
2. Fill out the additional questions in the `questions.json` file (Feel free to add your own)
3. Work through each cell in this document (`FineTuneYou.py`)

**Note:** If any errors occur, please ensure that the correct modules are installed, and your python environment is properly configured.


In [None]:
# Import modules
import sys
sys.path.append("src")
from data_upload import create_text_from_pdf, create_text_from_questions
from questions_creation import generate_resume_questions
from fine_tune import collate_fine_tune_data, split_data, upload_train_test_files, create_finetune_job, check_job
from embedding import create_embeddings
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
import openai

In [None]:
# Convert resume.pdf and questions.json to text files and store them in './src/text_files'
create_text_from_pdf()
create_text_from_questions()

In [None]:
# Generate question/answer pairs based on the uploaded resume using gpt-3.5-turbo
generate_resume_questions()

In [None]:
# Organize the data needed for fine-tuning and evaluation
collate_fine_tune_data()
split_data()

In [None]:
# Upload the training and validation files and get their IDs
training_file_id, validation_file_id = upload_train_test_files()

In [None]:
# Start the fine-tuning process
suffix = "FineTuneYou" #Choose a suffix for your fine-tuned model
job_id = create_finetune_job(training_file_id, validation_file_id, suffix)

In [None]:
# Check the status of the fine-tuning job
# The fine-tuning process usually takes around 5-10 Minutes
check_job(job_id)

In [None]:
# Generate embeddings
create_embeddings()

In [None]:
# Retrieve the ID of the fine-tuned model
# If "Fine-tuned model ID: None" Please wait for the fine-tuning to finish.
response = openai.FineTuningJob.retrieve(job_id)
fine_tuned_model_id = response["fine_tuned_model"]

print("Fine-tuned model ID:", fine_tuned_model_id)

In [None]:
# Initialize the language model with the fine-tuned model ID
llm_name=fine_tuned_model_id
persist_directory = 'docs/chroma/'
embedding = OpenAIEmbeddings()
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
llm = ChatOpenAI(model_name=llm_name, temperature=0.2)

In [None]:
# Define the prompt template for the QA chain
template = """Your task is to accurately represent professional and educational background, as well as interests and hobbies, while speaking in the first person. Kindly refrain from answering questions that aren't related to these topics.
{context}
Question: How long have you worked as an Amazon Associate?
Answer: I have worked as an Amazon Associate for five years.
Question: {question}
Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Run the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
# Execute the QA chain with a sample query
question = "Where did you grow up?"
result = qa_chain({"query": question})
result["result"]

# RESET ENVIRONMENT

In [None]:
import os
import glob
import shutil

def reset_project():
    # Remove specific text files in ./src/text_files/ except inappropriate_questions.txt
    text_files = glob.glob('./src/text_files/*.txt')
    for file in text_files:
        if 'inappropriate_questions.txt' not in file:
            os.remove(file)
    
    # Remove specific .json and .jsonl files in ./src/ 
    json_files = glob.glob('./src/*.json')
    jsonl_files = glob.glob('./src/*.jsonl')
    for file in json_files + jsonl_files:
        os.remove(file)
            
    # Remove the entire ./docs/ directory
    if os.path.exists('./docs'):
        shutil.rmtree('./docs')
    
    print("Project reset completed. All selected generated files have been removed.")

In [None]:
# Call the function to reset the project
reset_project()