In [1]:
import pickle
import os
import re
import nltk
import spacy
import torch
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download necessary NLP resources
nltk.download('punkt')
nltk.download('stopwords')

# Load Pre-trained BERT Model
bert_model = SentenceTransformer("all-MiniLM-L6-v2")

# Load NER Model
nlp = spacy.load("en_core_web_sm")

# Function to Preprocess Text
def preprocess(text):
    text = text.lower()
    text = re.sub(r"\W", " ", text)
    text = " ".join([word for word in text.split() if word not in stopwords.words("english")])
    return text

# Function to Train TF-IDF Model
def train_tfidf(texts):
    vectorizer = TfidfVectorizer(stop_words="english")
    vectorizer.fit(texts)
    return vectorizer

# Load resumes from files (for testing)
def load_resumes(directory="sample_resumes/"):
    resumes = []
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
            resumes.append(file.read())
    return resumes

# Create models directory if not exists
os.makedirs("models", exist_ok=True)

# Training the TF-IDF Model
print("Training TF-IDF Model...")
resumes = load_resumes()
vectorizer = train_tfidf(resumes)

# Saving Models
pickle.dump(vectorizer, open("models/vectorizer.pkl", "wb"))
pickle.dump(bert_model, open("models/bert_model.pkl", "wb"))

print("✅ Models saved successfully! Use them in `app.py`.")


  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training TF-IDF Model...
✅ Models saved successfully! Use them in `app.py`.
