In [1]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
import pandas as pd
import numpy as np

# Load the pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Define a padding token for the tokenizer (e.g., [PAD])
# You can use a different token if you prefer
tokenizer.pad_token = tokenizer.eos_token

# Load and preprocess your dataset (job descriptions and job titles)
dataset = pd.read_csv("/kaggle/input/gpttrain/x.csv")

# Handle NaN values in the "jobdescription" column
dataset["jobdescription"].fillna("", inplace=True)

# Tokenize the dataset
text_data = dataset["jobdescription"].tolist()

# Create a list of texts with a special separator token [SEP]
texts = ["[SEP] " + text for text in text_data]

# Prepare the training dataset
with open("your_training_data.txt", "w", encoding="utf-8") as file:
    file.write("\n".join(texts))

text_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="your_training_data.txt",
    block_size=128,
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./fine-tuned-model",
    overwrite_output_dir=True,
    num_train_epochs=1,
    per_device_train_batch_size=16,
    save_steps=10_000,
    save_total_limit=2,
)

# Create a Trainer instance for fine-tuning
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=text_dataset,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
trainer.save_model("./fine-tuned-model")

# Load the fine-tuned model for generating job descriptions
fine_tuned_model = GPT2LMHeadModel.from_pretrained("./fine-tuned-model")

# Now you can use `fine_tuned_model` to generate job descriptions based on job titles.




Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
500,3.2353
1000,2.9269
1500,2.7957
2000,2.7292
2500,2.6983
3000,2.6757
3500,2.6528
4000,2.6026


In [15]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the fine-tuned GPT-2 model
fine_tuned_model = GPT2LMHeadModel.from_pretrained("./fine")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Set the model to generate mode
fine_tuned_model.eval()

# Define a prompt for "Experience & Qualifications"
experience_qualifications_prompt = "Please provide details of your relevant experience and qualifications:\n"
# Define a prompt for "Hard Skills"
hard_skills_prompt = "List your hard skills and technical proficiencies: for sofware achitecht\n"

# Generate content for "Experience & Qualifications"
generated_experience_qualifications = fine_tuned_model.generate(
    input_ids=tokenizer.encode(experience_qualifications_prompt, return_tensors="pt"),
    max_length=200,  # Adjust the length as needed
    num_return_sequences=1,  # Number of generated sequences
    no_repeat_ngram_size=2,  # Avoid repeating phrases
    top_k=50,  # Limit the selection to the top-k tokens
    top_p=0.95,  # Limit the probability of tokens
    temperature=0.7,  # Adjust the temperature for randomness
)
# Generate content for "Hard Skills"
generated_hard_skills = fine_tuned_model.generate(
    input_ids=tokenizer.encode(hard_skills_prompt, return_tensors="pt"),
    max_length=200,  # Adjust the length as needed
    num_return_sequences=1,  # Number of generated sequences
    no_repeat_ngram_size=2,  # Avoid repeating phrases
    top_k=50,  # Limit the selection to the top-k tokens
    top_p=0.95,  # Limit the probability of tokens
    temperature=0.7,  # Adjust the temperature for randomness
)

# Decode the generated text for "Experience & Qualifications" and split it into points
generated_experience_qualifications_text = tokenizer.decode(
    generated_experience_qualifications[0], skip_special_tokens=True
)
experience_qualifications_points = generated_experience_qualifications_text.split("\n")

# Decode the generated text for "Hard Skills" and split it into points
generated_hard_skills_text = tokenizer.decode(
    generated_hard_skills[0], skip_special_tokens=True
)
hard_skills_points = generated_hard_skills_text.split("\n")

# Print the lists of points
print("Experience & Qualifications:")
for i, point in enumerate(experience_qualifications_points):
    print(f"{i + 1}. {point}")

print("\nHard Skills:")
for i, point in enumerate(hard_skills_points):
    print(f"{i + 1}. {point}")


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Experience & Qualifications:
1. Please provide details of your relevant experience and qualifications:
2. [SEP] Job Description   Send me Jobs like this Job Responsibilities: - Develop and maintain a strong team of technical and business leaders to support the business goals of the company. - Work closely with the Business Development team to ensure that the team is well-organized and well organized. Salary: Not Disclosed by Recruiter Industry: IT-Software / Software Services Functional Area: Sales, Retail, Business Intelligence Role Category:Retail Sales Role:Sales/Business Development Manager Keyskills Business development business development manager business intelligence business analysis business process Desired Candidate Profile Education- UG: Any Graduate PG:MBA/PGDM Doctorate:Any Doctoration - Any Specialization, Doctorates Not Required Please refer to the Job description above Company Profile: Confidential Confirm is a leading provider of IT services and solutions to clients i

In [18]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def generate_resume_from_job_title(job_title, model_path="./fine", max_length=200, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7):
    # Load the fine-tuned GPT-2 model
    fine_tuned_model = GPT2LMHeadModel.from_pretrained(model_path)
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    
    # Set the model to generate mode
    fine_tuned_model.eval()

    # Generate the resume content
    generated_text = fine_tuned_model.generate(
        input_ids=tokenizer.encode(job_title, return_tensors="pt"),
        max_length=max_length,  # Adjust the length as needed
        num_return_sequences=num_return_sequences,  # Number of generated sequences
        no_repeat_ngram_size=no_repeat_ngram_size,  # Avoid repeating phrases
        top_k=top_k,  # Limit the selection to the top-k tokens
        top_p=top_p,  # Limit the probability of tokens
        temperature=temperature,  # Adjust the temperature for randomness
    )

    # Decode the generated text
    generated_resume = tokenizer.decode(generated_text[0], skip_special_tokens=True)
    
    return generated_resume

# Example usage:
job_title = "Software Engineer"
generated_resume = generate_resume_from_job_title(job_title)
print(generated_resume)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Software Engineer Keyskills Java J2Ee JSP JMS JPA JEE JSF JAX-RS JQuery JUnit JBoss JIRA JBOSS JDeveloper JDE Desired Candidate Profile Education- UG: Any Graduate - Any Specialization PG:Any Postgraduate -Any Specializations Doctorate:Doctorate Not Required Please refer to the Job description above Company Profile: Confidential Confluence is a leading provider of IT services and solutions to Fortune 500 companies. We provide a wide range of services to our clients including: - IT Software - Application Programming, Maintenance Role Category:Programming & Design Role:Software Developer Keysky JAVA Jquery JSTL JSDLC JSRM JVMS Desirable Candidate Education - Ug: B.Tech/B.E. - Computers, BCA - Computer Science, MCA PG - Other Doctorates:M.Sc - Software Engineering, Computational Science Doctoral Doctor -(NonTechnical
