In [11]:
import pandas as pd
import random

# Define possible responses for each question
responses = {
    "Q1": ["I enjoy meeting new people.", "I prefer quiet, solitary activities.", "I enjoy meeting people occasionally."],
    "Q2": ["I like to focus on facts and details.", "I enjoy thinking about abstract ideas.", "I mix abstract and detailed thinking."],
    "Q3": ["I make decisions based on logic and facts.", "I consider emotions and values when deciding.", "I balance logic and emotions."],
    "Q4": ["I prefer to plan and organize tasks.", "I like to keep things flexible and open-ended.", "I balance planning and flexibility."],
    "Q5": ["I often think about possibilities and future plans.", "I focus on the present moment and practical solutions.", "I think about both future and present."],
    "Q6": ["I stay calm and focus on solutions.", "I feel anxious but adapt.", "I stay calm and reflective."],
    "Q7": ["I thrive in structured and predictable settings.", "I enjoy creative and flexible environments.", "I like a balance of structure and flexibility."],
    "Q8": ["I prefer multitasking.", "I focus on one task at a time.", "I can do both depending on the situation."],
    "Q9": ["I enjoy working independently.", "I thrive in collaborative teams.", "I balance independent and team work."],
    "Q10": ["I work best under pressure.", "I prefer completing tasks early.", "I adapt to deadlines as needed."],
    "Q11": ["I enjoy learning new skills regularly.", "I prefer mastering a specific skill.", "I balance mastering and learning new skills."],
    "Q12": ["I enjoy coming up with innovative solutions.", "I prefer following proven methods.", "I mix innovation with practicality."],
    "Q13": ["I appreciate feedback to improve myself.", "I feel defensive but reflect on feedback.", "I accept and act on feedback constructively."],
    "Q14": ["I lead discussions and brainstorming sessions.", "I participate actively but don’t lead.", "I mix leading and participating."],
    "Q15": ["I like structured job environments.", "I thrive in dynamic and unpredictable environments.", "I like balanced environments."]
}

# Define personality type rules
def determine_personality(answers):
    introvert_count = sum("quiet" in ans or "independent" in ans for ans in answers)
    extrovert_count = sum("enjoy" in ans or "collaborative" in ans for ans in answers)
    if introvert_count > extrovert_count:
        return "Introvert"
    elif extrovert_count > introvert_count:
        return "Extrovert"
    else:
        return "Mix"

# Define career predictions based on personality
def predict_career(personality):
    careers = {
        "Introvert": ["Research Analyst", "Data Scientist", "Content Writer"],
        "Extrovert": ["Project Manager", "Sales Executive", "Marketing Specialist"],
        "Mix": ["Product Designer", "Consultant", "Entrepreneur"]
    }
    return random.choice(careers[personality])

# Generate the dataset
data = []
for _ in range(2000):
    answers = [random.choice(responses[q]) for q in responses]
    personality = determine_personality(answers)
    career = predict_career(personality)
    data.append(answers + [personality, career])

# Create a DataFrame
columns = list(responses.keys()) + ["Personality Type", "Predicted Career"]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv("personality_job_dataset.csv", index=False)
print("Dataset generated successfully!")

Dataset generated successfully!


In [12]:
print(df.columns)



Index(['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11',
       'Q12', 'Q13', 'Q14', 'Q15', 'Personality Type', 'Predicted Career'],
      dtype='object')


In [13]:
import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd

# Assuming 'df' is your DataFrame and it contains answers as text
# Prepare your feature columns (text answers)
text_columns = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11',
       'Q12', 'Q13', 'Q14', 'Q15', 'Personality Type', 'Predicted Career']

# Assuming df is your DataFrame and it contains columns like 'Q1', 'Q2', ..., 'Q15'

# Create a new column that combines all answers into one text column
df['All_answers'] = df[text_columns].apply(lambda x: ' '.join(x.astype(str)), axis=1)

# Vectorize the answers using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['All_answers'])

# Define your target variable (e.g., career prediction column)
y = df['Predicted Career']

# Split data into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the RandomForestClassifier model
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the model and vectorizer to disk
import joblib
joblib.dump(model, 'career_prediction_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')  # Save the vectorizer as well
print("Model and vectorizer saved successfully!")


Model and vectorizer saved successfully!


In [14]:
import joblib
import pandas as pd

# Load the trained model and vectorizer
model = joblib.load('career_prediction_model.pkl')
vectorizer = joblib.load('vectorizer.pkl')  # Load the vectorizer

# Function to preprocess user input and make prediction
def predict_career(user_answers):
    # Create a DataFrame for the user answers (assuming 15 answers)
    user_data = pd.DataFrame([user_answers], columns=['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11', 'Q12', 'Q13', 'Q14', 'Q15'])
    
    # Combine the answers into one column for vectorization
    user_data['All_answers'] = user_data.apply(lambda x: ' '.join(x), axis=1)

    # Vectorize the answers using the same vectorizer used during training
    X_user = vectorizer.transform(user_data['All_answers'])

    # Predict the career using the trained model
    predicted_career = model.predict(X_user)
    
    return predicted_career[0]

# Example usage: user input answers (replace these with real user input)
user_answers = [
    "I prefer working with others and collaborating.",  # Q1
    "I enjoy solving problems independently.",          # Q2
    "I find myself getting easily stressed in group settings.",  # Q3
    "I enjoy leading teams and making decisions.",      # Q4
    "I prefer structured tasks with clear instructions.",  # Q5
    "I like creative, open-ended projects.",           # Q6
    "I find comfort in routine and stability.",        # Q7
    "I thrive in dynamic environments with changing tasks.",  # Q8
    "I enjoy helping others and providing support.",   # Q9
    "I prefer to listen before speaking.",             # Q10
    "I value personal growth and learning.",           # Q11
    "I enjoy exploring new ideas and concepts.",       # Q12
    "I feel energized after spending time alone.",      # Q13
    "I enjoy making quick decisions in uncertain situations.",  # Q14
    "I like planning and organizing tasks in advance."  # Q15
]

# Get the predicted career
predicted_career = predict_career(user_answers)

# Output the result
print(f"The predicted career for the provided answers is: {predicted_career}")


The predicted career for the provided answers is: Consultant
