In [1]:
# SkillLink - Prediction & Result
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import load_model

# -----------------------------
# Paths
# -----------------------------
output_dir = r"C:\Users\NXTWAVE\Downloads\Job Skill Recommendation"
tfidf_path = os.path.join(output_dir, 'tfidf_vectorizer.pkl')
mlb_path = os.path.join(output_dir, 'mlb.pkl')
model_path = os.path.join(output_dir, 'skill_model.keras')  # native Keras model

# -----------------------------
# Load Models & Artifacts
# -----------------------------
with open(tfidf_path, 'rb') as f:
    tfidf = pickle.load(f)

with open(mlb_path, 'rb') as f:
    mlb = pickle.load(f)

model = load_model(model_path)

# -----------------------------
# Preprocessing Function
# -----------------------------
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    if pd.isna(text):
        text = ""
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in stop_words]
    return " ".join(tokens)

# -----------------------------
# Content-Based Recommendation Function
# -----------------------------
def recommend_skills(user_skills, job_title="", industry="", top_n=10):
    # Combine user info into one string
    user_text = " ".join(user_skills)
    if job_title:
        user_text += " " + job_title
    if industry:
        user_text += " " + industry
    # Preprocess & transform
    user_vec = tfidf.transform([preprocess_text(user_text)])
    # Predict skill probabilities
    y_pred_prob = model.predict(user_vec.toarray())
    y_pred = (y_pred_prob > 0.5).astype(int)
    recommended_skills = mlb.inverse_transform(y_pred)
    # Flatten and return top N skills
    recommended_skills_flat = [s for sublist in recommended_skills for s in sublist]
    return recommended_skills_flat[:top_n]

# -----------------------------
# Example Prediction
# -----------------------------
user_skills_input = ['python', 'data analysis']
job_title_input = 'Data Scientist'
industry_input = 'IT'

recommended_skills = recommend_skills(user_skills_input, job_title_input, industry_input, top_n=10)

# -----------------------------
# Display Result
# -----------------------------
print("User Current Skills:", user_skills_input)
print("Target Job Title:", job_title_input)
print("Target Industry:", industry_input)
print("Recommended Skills:", recommended_skills)







[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\NXTWAVE\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\NXTWAVE\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


User Current Skills: ['python', 'data analysis']
Target Job Title: Data Scientist
Target Industry: IT
Recommended Skills: ["'communication'"]
