In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load your dataset
df = pd.read_csv('personality_job_dataset.csv')

# TF-IDF vectorizer for question responses
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df.iloc[:, 0:15].values.flatten())

# Label encoding for Career
career_encoder = LabelEncoder()
y = career_encoder.fit_transform(df['Predicted Career'])

# Calculate pairwise cosine similarities
similarity_matrix = cosine_similarity(X)


In [11]:
def predict_career(user_responses):
    # Vectorize user responses (pass the responses directly, not as a list of lists)
    user_vector = vectorizer.transform(user_responses)  # Fix here, no need for extra list around user_responses
    
    # Compute cosine similarity with the dataset
    similarity_scores = cosine_similarity(user_vector, X)
    
    # Find the most similar career
    best_match_idx = np.argmax(similarity_scores)
    predicted_career = career_encoder.inverse_transform([best_match_idx])
    
    return predicted_career

# Example user responses
user_responses = [
    "I enjoy meeting people occasionally.",         # How do you prefer social interactions?
    "I mix abstract and detailed thinking.",       # How do you process information?
    "I balance logic and emotions.",              # How do you make decisions?
    "I balance planning and flexibility.",        # How do you organize your life?
    "I often think about possibilities and future plans.",  # How do you approach creativity and ideas?
    "I stay calm and focus on solutions.",        # How do you handle stress?
    "I like a balance of structure and flexibility.", # Do you prefer routine or change?
    "I can do both depending on the situation.",   # How do you deal with conflict?
    "I enjoy working independently.",              # How do you prioritize tasks?
    "I work best under pressure.",                # How do you handle change?
    "I enjoy learning new skills regularly.",     # How do you approach problems?
    "I mix innovation with practicality.",         # How do you respond to feedback?
    "I feel defensive but reflect on feedback.",  # How do you approach learning?
    "I participate actively but don’t lead.",     # How do you work under pressure?
    "I like balanced environments."               # How do you manage your time?
]

# Predict the career
predicted_career = predict_career(user_responses)
print(predicted_career)


['Consultant']


In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def predict_career(user_responses, vectorizer, X, career_encoder):
    # Vectorize user responses (pass the responses directly, not as a list of lists)
    user_vector = vectorizer.transform(user_responses)
    
    # Compute cosine similarity with dataset
    similarity_scores = cosine_similarity(user_vector, X)
    
    # Get the index of the most similar career (highest similarity score)
    best_match_idx = np.argmax(similarity_scores)
    
    # Get the predicted career label
    predicted_career = career_encoder.inverse_transform([best_match_idx])
    
    # Confidence score is the maximum similarity score
    confidence_score = similarity_scores[0][best_match_idx]  # The first element, since we're comparing one user input
    
    return predicted_career[0], confidence_score

# Example user responses for testing
user_responses_1 = [
    "I enjoy meeting people occasionally.",         # How do you prefer social interactions?
    "I mix abstract and detailed thinking.",       # How do you process information?
    "I consider emotions and values when deciding.",  # How do you make decisions?
    "I prefer to plan and organize tasks.",        # How do you organize your life?
    "I often think about possibilities and future plans.",  # How do you approach creativity and ideas?
    "I stay calm and reflective.",                # How do you handle stress?
    "I enjoy creative and flexible environments.",  # Do you prefer routine or change?
    "I can do both depending on the situation.",   # How do you deal with conflict?
    "I thrive in collaborative teams.",            # How do you prioritize tasks?
    "I prefer completing tasks early.",           # How do you handle change?
    "I prefer mastering a specific skill.",       # How do you approach problems?
    "I prefer following proven methods.",         # How do you respond to feedback?
    "I accept and act on feedback constructively.",  # How do you approach learning?
    "I mix leading and participating.",           # How do you work under pressure?
    "I like structured job environments."         # How do you manage your time?
]

predicted_career_1, confidence_score_1 = predict_career(user_responses_1, vectorizer, X, career_encoder)

print(f"Predicted Career: {predicted_career_1}")
print(f"Confidence Score: {confidence_score_1}")


Predicted Career: Consultant
Confidence Score: 1.0000000000000002
