In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import nltk
import string
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Load datasets
@st.cache_data
def load_data():
    resume_df = pd.read_csv("Resume.csv")
    job_df = pd.read_csv("Training.csv")
    return resume_df, job_df

resume_df, job_df = load_data()

# Preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(f"[{string.punctuation}]", "", text)
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return " ".join(tokens)

# Apply preprocessing
resume_df['processed_text'] = resume_df['Resume'].apply(preprocess_text)
job_df['processed_text'] = job_df['Job Description'].apply(preprocess_text)

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer()
tfidf_resume = tfidf_vectorizer.fit_transform(resume_df['processed_text'])
tfidf_jobs = tfidf_vectorizer.transform(job_df['processed_text'])

# Compute cosine similarity
cosine_similarities = cosine_similarity(tfidf_jobs, tfidf_resume)

# Function to get top resumes
def get_top_n_resumes(job_index, n=10):
    scores = list(enumerate(cosine_similarities[job_index]))
    scores.sort(key=lambda x: x[1], reverse=True)
    top_resumes = [resume_df.iloc[i[0]]['Resume'] for i in scores[:n]]
    return top_resumes

# Streamlit UI
st.title("Resume Matching System")

job_index = st.number_input("Enter Job Index (0 to N-1)", min_value=0, max_value=len(job_df)-1, step=1)
if st.button("Find Top Resumes"):
    top_resumes = get_top_n_resumes(job_index)
    st.write("### Top 10 Resumes:")
    for i, res in enumerate(top_resumes, 1):
        st.write(f"**{i}.** {res}")

