In [1]:
pip install nltk tensorflow scikit-learn pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import nltk
import re

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
job_df = pd.read_csv(r"C:\Users\ADMIN\Downloads\job_descriptions.csv")
intent_df = pd.read_csv(r"C:\Users\ADMIN\Downloads\hr_intents.csv")
skills_df = pd.read_csv(r"C:\Users\ADMIN\Downloads\skills_master.csv")

job_df.head()


Unnamed: 0,job_id,job_title,required_experience,skills,job_description
0,1,Data Analyst,2,"python,sql,excel,power bi","Analyze business data, create dashboards, writ..."
1,2,Data Scientist,3,"python,ml,nlp,statistics","Build machine learning models, analyze large d..."
2,3,Machine Learning Engineer,3,"python,ml,deep learning,tensorflow",Develop scalable ML models and deploy them int...
3,4,HR Executive,1,"communication,hr policies,recruitment","Handle recruitment process, employee onboardin..."
4,5,Software Developer,2,"python,java,git,problem solving","Develop software applications, write clean cod..."


In [4]:
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z ]', '', text)
    tokens = nltk.word_tokenize(text)
    tokens = [w for w in tokens if w not in stop_words]
    return " ".join(tokens)


In [5]:
intent_df["clean_question"] = intent_df["question"].apply(clean_text)

X_texts = intent_df["clean_question"].values
y_labels = intent_df["intent"].values


In [6]:
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(X_texts)

X_seq = tokenizer.texts_to_sequences(X_texts)

max_len = max(len(seq) for seq in X_seq)
X_padded = pad_sequences(X_seq, maxlen=max_len, padding='post')


In [7]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_labels)

y_categorical = to_categorical(y_encoded)


In [8]:
vocab_size = len(tokenizer.word_index) + 1
num_classes = y_categorical.shape[1]

model = Sequential([
    Embedding(
        input_dim=vocab_size,
        output_dim=64,
        input_shape=(max_len,)   # ✅ IMPORTANT FIX
    ),
    LSTM(128),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


  super().__init__(**kwargs)


In [9]:
history = model.fit(
    X_padded,
    y_categorical,
    epochs=50,
    batch_size=8,
    verbose=1
)


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 72ms/step - accuracy: 0.0714 - loss: 2.3038  
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.2857 - loss: 2.2991
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.2857 - loss: 2.2938 
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.2857 - loss: 2.2888
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.2857 - loss: 2.2879
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.3571 - loss: 2.2790
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.4286 - loss: 2.2772
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.3571 - loss: 2.2646
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [10]:
def predict_intent(text):
    text = clean_text(text)
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len, padding='post')
    pred = model.predict(padded)
    intent = label_encoder.inverse_transform([np.argmax(pred)])
    confidence = np.max(pred)
    return intent[0], confidence


In [11]:
def show_jobs():
    print("\nAvailable Jobs:")
    for i, row in job_df.iterrows():
        print(f"{i+1}. {row['job_title']}")

def get_job(job_title):
    return job_df[job_df['job_title'].str.lower() == job_title.lower()]


In [12]:
def extract_skills(text):
    text = clean_text(text)
    found_skills = []
    for skill in skills_df['skill']:
        if skill in text:
            found_skills.append(skill)
    return list(set(found_skills))


In [13]:
def ats_score(jd_skills, candidate_skills):
    matched = set(jd_skills).intersection(set(candidate_skills))
    score = (len(matched) / len(jd_skills)) * 100
    return score, matched


In [14]:
selected_job = None
candidate_skills = []
candidate_exp = 0


In [15]:
def chatbot_response(user_input):
    global selected_job, candidate_skills, candidate_exp
    
    intent, conf = predict_intent(user_input)
    
    if conf < 0.6:
        return "Sorry, I didn’t understand that. Please try again."
    
    if intent == "greeting":
        return "Hello! Welcome to HR Recruitment Chatbot."
    
    if intent == "job_inquiry":
        show_jobs()
        return "Please type the job title you want to apply for."
    
    if intent == "apply_job":
        show_jobs()
        return "Select a job by typing job title."
    
    if intent == "jd_request":
        if selected_job is not None:
            return selected_job['job_description'].values[0]
        else:
            return "Please select a job first."
    
    if selected_job is None:
        job = get_job(user_input)
        if not job.empty:
            selected_job = job
            return f"Job Selected: {job['job_title'].values[0]}\nEnter your skills:"
    
    if "skill" in intent:
        candidate_skills = extract_skills(user_input)
        return "Enter your total experience (in years):"
    
    if "experience" in intent:
        candidate_exp = int(re.findall(r'\d+', user_input)[0])
        
        jd_skills = selected_job['skills'].values[0].split(",")
        jd_exp = selected_job['required_experience'].values[0]
        
        score, matched = ats_score(jd_skills, candidate_skills)
        
        if score >= 70 and candidate_exp >= jd_exp:
            return f"✅ Shortlisted\nATS Score: {score:.2f}%\nMatched Skills: {matched}"
        else:
            return f"❌ Rejected\nATS Score: {score:.2f}%\nMatched Skills: {matched}"
    
    if intent == "goodbye":
        return "Thank you for visiting. Goodbye!"
    
    return "How can I assist you?"


In [16]:
print("HR Recruitment Chatbot (type 'exit' to stop)\n")

while True:
    user = input("You: ")
    if user.lower() == "exit":
        print("Bot: Goodbye!")
        break
    print("Bot:", chatbot_response(user))


HR Recruitment Chatbot (type 'exit' to stop)



You:  hi


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 433ms/step
Bot: Hello! Welcome to HR Recruitment Chatbot.


You:  I want to apply for a job


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step

Available Jobs:
1. Data Analyst
2. Data Scientist
3. Machine Learning Engineer
4. HR Executive
5. Software Developer
6. Business Analyst
7. AI Engineer
8. Web Developer
9. Data Engineer
10. Intern Data Analyst
Bot: Select a job by typing job title.


You:  Show job description


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
Bot: Please select a job first.


You:   Data Analyst


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Bot: Sorry, I didn’t understand that. Please try again.


You:  


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Bot: Thank you for visiting. Goodbye!


You:  Exit


Bot: Goodbye!


In [18]:
model.save("Chatbot_Mode_hrl.h5")

