In [11]:
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download("punkt")
nltk.download("stopwords")

def preprocess_text(text):
    tokens = word_tokenize(text)
    tokens = [word.lower() for word in tokens if word.isalnum()]
    stop_words = set(stopwords.words("english"))
    tokens = [word for word in tokens if word not in stop_words]
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]
    return " ".join(tokens)

def compare_skills(user_skills, sector_skills):
    user_skills = preprocess_text(user_skills)
    sector_skills = preprocess_text(sector_skills)

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([user_skills, sector_skills])
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])

    return cosine_sim[0][0]

def main():
    print("Welcome to the Skill Matching Program!")

    user_skills = input("Please enter your skills, separated by commas: ")
    user_sector = input("Please specify the sector you are interested in: ")

    sectors_data = pd.read_excel("skill by sector.xlsx")

    best_match = None
    best_match_score = 0.0

    for _, row in sectors_data.iterrows():
        sector = row["sector"]
        sector_skills = row["skills"]
        similarity_score = compare_skills(user_skills, sector_skills)

        if similarity_score > best_match_score:
            best_match = sector
            best_match_score = similarity_score

    if best_match is not None:
        sector_row = sectors_data.loc[sectors_data["sector"] == best_match].iloc[0]
        required_skills = set(sector_row["skills"].split(","))
        user_input_skills = set(user_skills.split(","))
        matching_skills = user_input_skills.intersection(required_skills)
        lacking_skills = required_skills.difference(user_input_skills)

        print(f"Best Matching Sector: {best_match}")
        print("Matching Skills:", ", ".join(matching_skills))
        print("Lacking Skills:", ", ".join(lacking_skills))

        if len(lacking_skills) > 0:
            print("Job Description:")
            print(sector_row["job description"])
        else:
            print("Congratulations! You have the required skills for this sector.")
    else:
        print("Sorry, no matching sector found in our database.")

if __name__ == "__main__":
    main()


[nltk_data] Downloading package punkt to /Users/angzhinuo/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/angzhinuo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Welcome to the Skill Matching Program!
Best Matching Sector: painting teacher
Matching Skills: painting, leadership, communication
Lacking Skills: English, lesson planning, malaysia, speaking, mandarin
Job Description:
Preparing and delivering lessons to students on fundamental topics in arts;Explaining and demonstrating artistic techniques;Planning and preparing lessons in accordance with the prescribed/recommended curriculum;Teaching and supervising students’ class work and discipline;Preparing, assigning and marking exercises, assignments and tests to evaluate student progress;Preparing reports on students’ work and conferring with other teachers and parents;Participating in staff meetings and educational conferences and workshops;Organising or assisting in co-curricular activities;Performing related tasks; and Enhancing knowledge and coordinating work performance.
