In [9]:
import re
import random
import spacy
import numpy as np
import tensorflow as tf
import pickle
import json
from PyPDF2 import PdfReader

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load the trained chatbot model
model = tf.keras.models.load_model('/kaggle/working/chatbot_model_skills.h5')

# Load words and classes
words = pickle.load(open('/kaggle/working/words_skills.pkl', 'rb'))
classes = pickle.load(open('/kaggle/working/classes_skills.pkl', 'rb'))

# Load intents file
intents = json.loads(open('/kaggle/input/intents/skills2.json').read())

# Skill categories (same as before)
ds_keyword = ['tensorflow', 'keras', 'pytorch', 'machine learning', 'deep learning', 'flask', 'streamlit']
web_keyword = ['react', 'django', 'node js', 'react js', 'php', 'laravel', 'magento', 'wordpress', 
               'javascript', 'angular js', 'c#', 'asp.net', 'flask']
android_keyword = ['android', 'android development', 'flutter', 'kotlin', 'xml', 'kivy']
ios_keyword = ['ios', 'ios development', 'swift', 'cocoa', 'cocoa touch', 'xcode']
uiux_keyword = ['ux', 'adobe xd', 'figma', 'zeplin', 'balsamiq', 'ui', 'prototyping', 'wireframes', 
                'storyframes', 'adobe photoshop', 'photoshop', 'editing', 'adobe illustrator', 
                'illustrator', 'adobe after effects', 'after effects', 'adobe premier pro', 
                'premier pro', 'adobe indesign', 'indesign', 'wireframe', 'solid', 'grasp', 
                'user research', 'user experience']
n_any = ['english', 'communication', 'writing', 'microsoft office', 'leadership', 
         'customer management', 'social media']

def clean_text(text):
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    return text

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def analyze_skills(resume_text, skill_keywords):
    missing_skills = [skill for skill in skill_keywords if skill not in resume_text]
    return random.choice(missing_skills) if missing_skills else None

def analyze_resume(pdf_path):
    resume_text = extract_text_from_pdf(pdf_path)
    resume_text = clean_text(resume_text)

    missing_skills_summary = {
        "Data Science": analyze_skills(resume_text, ds_keyword),
        "Web Development": analyze_skills(resume_text, web_keyword),
        "Android Development": analyze_skills(resume_text, android_keyword),
        "iOS Development": analyze_skills(resume_text, ios_keyword),
        "UI/UX Design": analyze_skills(resume_text, uiux_keyword),
        "Other Skills": analyze_skills(resume_text, n_any)
    }

    return {k: v for k, v in missing_skills_summary.items() if v}

def lemmatize_word(word):
    return nlp(word)[0].lemma_

def clean_up_sentence(sentence):
    sentence_words = sentence.split()
    sentence_words = [lemmatize_word(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)
    bag = [0]*len(words)
    for s in sentence_words:
        for i, word in enumerate(words):
            if word == s:
                bag[i] = 1
    return np.array(bag)

def predict_class(sentence):
    p = bow(sentence, words)
    p = np.reshape(p, (1, len(p)))
    prediction = model.predict(p)
    return prediction

def get_response(prediction, intents_json):
    class_idx = np.argmax(prediction)
    tag = classes[class_idx]

    for intent in intents_json['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])

    return "Sorry, I didn't understand that."

def analyze_resume_with_chatbot_responses(pdf_path, output_path):
    # Analyze the resume
    missing_skills_summary = analyze_resume(pdf_path)

    # Generate chatbot responses for missing skills
    chatbot_responses = {
        domain: get_response(predict_class(skill), intents)
        for domain, skill in missing_skills_summary.items()
    }

    # Combine missing skills and chatbot responses
    output_data = {
        "Missing Skills Analysis": missing_skills_summary,
        "Chatbot Responses": chatbot_responses
    }

    # Save to JSON file
    with open(output_path, "w") as json_file:
        json.dump(output_data, json_file, indent=4)

    # Print to console
    print("Missing Skills :")
    for domain, skill in missing_skills_summary.items():
        print(f"{domain}: {skill}")

    print("\n")
    for domain, response in chatbot_responses.items():
        print(f"{domain}: {response}")

    print(f"\nMissing skills and chatbot responses saved to: {output_path}")

    return output_data

# Example Usage
pdf_path = "/kaggle/input/resume/resume.pdf"
output_path_combined = "/kaggle/working/missing_skills_and_chatbot_responses.json"

# Analyze resume and save results
result = analyze_resume_with_chatbot_responses(pdf_path, output_path_combined)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Missing Skills :
Data Science: pytorch
Web Development: magento
Android Development: kivy
iOS Development: xcode
UI/UX Design: wireframes
Other Skills: leadership


Data Science: <div class='skill-description'><h3>PyTorch</h3><p>PyTorch is a modern machine learning framework known for its dynamic computational graphs.</p><h4>Key Features:</h4><ul><li>Dynamic neural networks</li><li>Python integration</li><li>Extensive libraries</li></ul><h4>Applications:</h4><ul><li>Deep learning research</li><li>Computer vision</li><li>Natural language processing</li></ul><h4>

In [None]:
$ source .venv/scripts/activate

In [15]:
import re
import random
import spacy
import numpy as np
import tensorflow as tf
import pickle
import json
from PyPDF2 import PdfReader

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load the trained chatbot model
model = tf.keras.models.load_model('/kaggle/working/chatbot_model_skills.h5')

# Load words and classes
words = pickle.load(open('/kaggle/working/words_skills.pkl', 'rb'))
classes = pickle.load(open('/kaggle/working/classes_skills.pkl', 'rb'))

# Load intents file
intents = json.loads(open('/kaggle/input/intents/skills2.json').read())

# Skill categories (same as before)
ds_keyword = ['tensorflow', 'keras', 'pytorch', 'machine learning', 'deep learning', 'flask', 'streamlit']
web_keyword = ['react', 'django', 'node js', 'react js', 'php', 'laravel', 'magento', 'wordpress', 
               'javascript', 'angular js', 'c#', 'asp.net', 'flask']
android_keyword = ['android', 'android development', 'flutter', 'kotlin', 'xml', 'kivy']
ios_keyword = ['ios', 'ios development', 'swift', 'cocoa', 'cocoa touch', 'xcode']
uiux_keyword = ['ux', 'adobe xd', 'figma', 'zeplin', 'balsamiq', 'ui', 'prototyping', 'wireframes', 
                'storyframes', 'adobe photoshop', 'photoshop', 'editing', 'adobe illustrator', 
                'illustrator', 'adobe after effects', 'after effects', 'adobe premier pro', 
                'premier pro', 'adobe indesign', 'indesign', 'wireframe', 'solid', 'grasp', 
                'user research', 'user experience']
n_any = ['english', 'communication', 'writing', 'microsoft office', 'leadership', 
         'customer management', 'social media']

def clean_text(text):
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    return text

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def analyze_skills(resume_text, skill_keywords):
    missing_skills = [skill for skill in skill_keywords if skill not in resume_text]
    return missing_skills

def analyze_resume(pdf_path):
    resume_text = extract_text_from_pdf(pdf_path)
    resume_text = clean_text(resume_text)

    missing_skills_summary = {
        "Data Science": analyze_skills(resume_text, ds_keyword),
        "Web Development": analyze_skills(resume_text, web_keyword),
        "Android Development": analyze_skills(resume_text, android_keyword),
        "iOS Development": analyze_skills(resume_text, ios_keyword),
        "UI/UX Design": analyze_skills(resume_text, uiux_keyword),
        "Other Skills": analyze_skills(resume_text, n_any)
    }

    return {k: random.choice(v) if v else None for k, v in missing_skills_summary.items()}

def lemmatize_word(word):
    return nlp(word)[0].lemma_

def clean_up_sentence(sentence):
    sentence_words = sentence.split()
    sentence_words = [lemmatize_word(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words, show_details=True):
    sentence_words = sentence.split()
    sentence_words = [lemmatize_word(word.lower()) for word in sentence_words]
    bag = [0]*len(words)
    for s in sentence_words:
        for i, word in enumerate(words):
            if word == s:
                bag[i] = 1
    return np.array(bag)

def predict_class(sentence):
    p = bow(sentence, words)
    p = np.reshape(p, (1, len(p)))
    prediction = model.predict(p)
    return prediction

def get_response(prediction, intents_json):
    class_idx = np.argmax(prediction)
    tag = classes[class_idx]

    for intent in intents_json['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])

    return "Sorry, I didn't understand that."

def provide_feedback(resume_text, missing_skills_summary):
    feedback = []

    # Feedback for Data Science
    if missing_skills_summary.get("Data Science"):
        missing = missing_skills_summary["Data Science"]
        feedback.append(f"Your Data Science skills are crucial for many AI-driven tasks. Learning more about {missing} will strengthen your profile. You could also explore more about deep learning frameworks like TensorFlow and PyTorch. Keep practicing and consider exploring advanced topics such as reinforcement learning and neural networks!")

    # Feedback for Web Development
    if missing_skills_summary.get("Web Development"):
        missing = missing_skills_summary["Web Development"]
        feedback.append(f"Your web development skills are quite strong, but adding {missing} will expand your toolkit. Learning React or other JavaScript frameworks can help you build modern, dynamic websites. Django is a solid framework for backend development, and by mastering it, you can build full-stack applications!")

    # Feedback for Android Development
    if missing_skills_summary.get("Android Development"):
        missing = missing_skills_summary["Android Development"]
        feedback.append(f"Your knowledge of Android development shows promise! Expanding your skill set with {missing} will help you stay competitive. Consider mastering Kotlin for Android development, as it is the preferred language. Exploring Android Jetpack and modern frameworks like Flutter will make your development process smoother!")

    # Feedback for iOS Development
    if missing_skills_summary.get("iOS Development"):
        missing = missing_skills_summary["iOS Development"]
        feedback.append(f"iOS development is a fast-growing field, and you have great potential here! Missing skills like {missing} will help you leverage tools like Swift and Xcode to create cutting-edge iOS applications.")

    # Feedback for UI/UX Design
    if missing_skills_summary.get("UI/UX Design"):
        missing = missing_skills_summary["UI/UX Design"]
        feedback.append(f"Your creativity in UI/UX design is a huge asset! By learning more about {missing}, you can broaden your design horizon. Mastering tools like Figma, Adobe XD, and Sketch will allow you to design stunning user interfaces.")

    # Feedback for Other Skills
    if missing_skills_summary.get("Other Skills"):
        missing = missing_skills_summary["Other Skills"]
        feedback.append(f"Along with your technical skills, having strong soft skills like {missing} is important for career success. Enhancing your communication and leadership skills will allow you to work effectively in teams and handle complex projects.")

    return feedback

def beautify_with_html_tags(missing_skills_summary, chatbot_responses, feedback):
    # Wrap missing skills with HTML tags
    missing_skills_html = {
        domain: f"<span class='missing-skill'>{skill}</span>" if skill else "<span class='no-skill'>None</span>"
        for domain, skill in missing_skills_summary.items()
    }

    # Wrap chatbot responses with HTML tags
    chatbot_responses_html = {
        domain: f"<p class='chatbot-response'>{response}</p>"
        for domain, response in chatbot_responses.items()
    }

    # Wrap feedback with HTML tags
    feedback_html = [
        f"<li class='feedback-item'>{line}</li>" for line in feedback
    ]

    # Combine into a single HTML-enhanced JSON
    beautified_output = {
        "Missing Skills Analysis": missing_skills_html,
        "Chatbot Responses": chatbot_responses_html,
        "Feedback": f"<ul class='feedback-list'>{''.join(feedback_html)}</ul>"
    }
    return beautified_output


def analyze_resume_with_html_output(pdf_path, output_path):
    # Analyze the resume
    missing_skills_summary = analyze_resume(pdf_path)

    # Provide feedback
    feedback = provide_feedback(extract_text_from_pdf(pdf_path), missing_skills_summary)

    # Generate chatbot responses for missing skills
    chatbot_responses = {
        domain: get_response(predict_class(skill), intents)
        for domain, skill in missing_skills_summary.items() if skill
    }

    # Beautify output with HTML tags
    beautified_output = beautify_with_html_tags(missing_skills_summary, chatbot_responses, feedback)

    # Save beautified output to a JSON file
    with open(output_path, "w") as json_file:
        json.dump(beautified_output, json_file, indent=4)

    # Print to console
    print("Beautified JSON saved with HTML tags!")

    return beautified_output


# Example Usage
pdf_path = "/kaggle/input/resume/resume.pdf"
output_path_html = "/kaggle/working/missing_skills_and_feedback_html.json"

# Analyze resume and save beautified HTML JSON
result_html = analyze_resume_with_html_output(pdf_path, output_path_html)

print(result_html)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Beautified JSON saved with HTML tags!
{'Missing Skills Analysis': {'Data Science': "<span class='missing-skill'>deep learning</span>", 'Web Development': "<span class='missing-skill'>asp.net</span>", 'Android Development': "<span class='missing-skill'>android</span>", 'iOS Development': "<span class='missing-skill'>cocoa touch</span>", 'UI/UX Design': "<span class='missing-skill'>illustrator</span>", 'Other Skills': "<span class='missing-skill'>customer management</span>"}, 'Chatbot Responses': {'Data Science': "<p class='chatbot-response'><div class='skill-des

In [6]:
import re
import json
from PyPDF2 import PdfReader
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Predefined list of actionable skill categories
skill_categories = {
    "programming_languages": ["python", "java", "javascript", "sql", "c++", "c#", "ruby", "go", "rust", "typescript", "swift", "kotlin", "php", "r", "matlab", "lua", "perl", "objective-c", "bash", "dart", "scala", "julia"],
    "frameworks": ["django", "flask", "react", "angular", "node.js", "spring", "vue.js", "express", "laravel", "next.js", "svelte", "ember.js", "backbone.js", "rails", "fastapi", "nestjs"],
    "tools": ["git", "docker", "kubernetes", "jenkins", "terraform", "ansible", "vagrant", "gitlab", "circleci", "apache", "prometheus", "grafana", "chef", "puppet", "nexus", "artifactory"],
    "databases": ["mysql", "postgresql", "mongodb", "oracle", "sqlite", "redis", "cassandra", "elasticsearch", "firebase", "mariadb", "ibm db2", "influxdb", "couchdb"],
    "methodologies": ["agile", "scrum", "devops", "test-driven development", "lean", "kanban", "waterfall", "continuous integration", "extreme programming (XP)", "feature-driven development (FDD)"],
    "other_skills": ["etl", "pandas", "tailwindcss", "rest api", "graphql", "docker-compose", "jest", "selenium", "firebase", "kafka", "oauth", "jwt", "solr", "apache spark", "tensorflow", "keras", "pytorch", "scikit-learn", "opencv", "numpy", "matplotlib", "seaborn"]
}

def clean_text(text):
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    return text

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def extract_keywords(text):
    doc = nlp(text)
    keywords = [chunk.text.lower() for chunk in doc.noun_chunks]
    keywords = list(set(keywords))  # Remove duplicates
    return keywords

def filter_skills(job_keywords):
    filtered_skills = []
    for skill in job_keywords:
        for category, keywords in skill_categories.items():
            if skill in keywords:
                filtered_skills.append(skill)
                break
    return filtered_skills

def analyze_skills(resume_text, job_keywords):
    matched_skills = [skill for skill in job_keywords if skill in resume_text]
    unmatched_skills = [skill for skill in job_keywords if skill not in resume_text]
    return matched_skills, unmatched_skills

def analyze_resume_and_job_description(resume_path, job_desc_path, output_json_path):
    # Extract and clean resume text
    resume_text = extract_text_from_pdf(resume_path)
    resume_text = clean_text(resume_text)

    # Load and clean job description text
    with open(job_desc_path, 'r') as f:
        job_description = json.load(f)

    job_text = clean_text(job_description['description'])

    # Extract and filter skills from job description
    job_keywords = extract_keywords(job_text)
    job_keywords = filter_skills(job_keywords)

    # Analyze skills
    matched_skills, unmatched_skills = analyze_skills(resume_text, job_keywords)

    # Format results in HTML
    html_output = f"""
    <h1>Resume Analysis Results</h1>
    <h2>Matched Skills</h2>
    <ul>
    """
    for skill in matched_skills:
        html_output += f"<li>{skill}</li>"
    html_output += """
    </ul>
    <h2>Unmatched Skills</h2>
    <ul>
    """
    for skill in unmatched_skills:
        html_output += f"<li>{skill}</li>"
    html_output += """
    </ul>
    <br>
    <p>Analysis completed successfully.</p>
    """

    # Save results in JSON, including HTML
    results = {
        "Matched Skills": matched_skills,
        "Unmatched Skills": unmatched_skills,
        "HTML Results": html_output
    }
    with open(output_json_path, 'w') as json_file:
        json.dump(results, json_file, indent=4)

    return html_output

# Example Usage
resume_path = "/kaggle/input/resume/resume.pdf"
job_desc_path = "/kaggle/input/intents/job_dec.json"
output_json_path = "/kaggle/working/results.json"

# Analyze resume and job description
result_html = analyze_resume_and_job_description(resume_path, job_desc_path, output_json_path)

# Print HTML results for frontend rendering
print(result_html)



    <h1>Resume Analysis Results</h1>
    <h2>Matched Skills</h2>
    <ul>
    <li>pandas</li>
    </ul>
    <h2>Unmatched Skills</h2>
    <ul>
    <li>tailwindcss</li>
    </ul>
    <br>
    <p>Analysis completed successfully.</p>
    


In [None]:
source .venv/Scripts/activate