In [1]:
from flask import Flask, render_template, request, jsonify
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier

app = Flask(__name__)

# Your preprocessing and model training code here...
# Sample DataFrame
data = pd.DataFrame({
    "Grade": [10, 11, 12, 10, 11, 12, 9, 10, 11, 12, 10, 11, 12, 9, 11, 10, 11, 12, 10, 11, 12, 9, 10, 11, 12],
    "Interests": ["Chemistry, Environmental Science", "History, Political Science", "Literature, Creative Writing", 
                  "Physics, Mathematics", "Biology, Medicine", "Economics, Business Studies", "Art, Photography", 
                  "Computer Science, Robotics", "Music, Performing Arts", "Chemistry, Chemical Engineering", 
                  "Fashion Design, Textile Engineering", "Sports, Physical Education", "Sociology, Psychology", 
                  "Computer Programming, Game Design", "Fine Arts, Digital Media", "Biology, Environmental Science", 
                  "Economics, Mathematics", "History, Law", "Music, Sound Engineering", "Engineering, Robotics", 
                  "Biology, Nutrition", "Mathematics, Physics", "Photography, Film Studies", "Business Studies, Economics", 
                  "Psychology, Social Work"],
   "Strengths/Aptitudes": ["Analytical thinking", "Argumentation, Leadership", "Creativity, Communication", 
                            "Problem-solving, Logical thinking", "Empathy, Attention to detail", 
                            "Strategic thinking, Planning", "Creativity, Artistic skills", 
                            "Logical reasoning, Innovation", "Musical talent, Performance", 
                            "Analytical thinking, Precision", "Creativity, Technical skills", 
                            "Physical endurance, Leadership", "Empathy, Communication", 
                            "Problem-solving, Creativity", "Creativity, Technical skills", 
                            "Observational skills, Research", "Analytical thinking, Strategic planning", 
                            "Argumentation, Critical thinking", "Musical talent, Technical skills", 
                            "Innovation, Problem-solving", "Attention to detail, Empathy", 
                            "Problem-solving, Logical thinking", "Creativity, Visual storytelling", 
                            "Leadership, Strategic thinking", "Empathy, Communication"],
   "Career Possibilities": ["Environmental Scientist, Chemist, Ecologist", "Politician, Diplomat, Lawyer, Public Policy Analyst", 
                             "Author, Editor, Journalist, Screenwriter", "Physicist, Mathematician, Data Scientist", 
                             "Doctor, Surgeon, Medical Researcher", "Economist, Financial Analyst, Business Consultant", 
                             "Photographer, Graphic Designer, Visual Artist", "Robotics Engineer, Software Developer, AI Specialist", 
                             "Musician, Performer, Composer, Music Teacher", "Chemical Engineer, Pharmacist, Research Scientist", 
                             "Fashion Designer, Textile Engineer, Product Designer", "Athlete, Sports Coach, Fitness Trainer", 
                             "Counselor, Social Worker, Psychologist", "Game Developer, Software Engineer, UI/UX Designer", 
                             "Digital Artist, Animator, Graphic Designer", "Marine Biologist, Environmental Scientist, Conservationist", 
                             "Economist, Statistician, Financial Analyst", "Lawyer, Historian, Judge", 
                             "Sound Engineer, Music Producer, Audio Technician", "Mechanical Engineer, Robotics Specialist", 
                             "Nutritionist, Dietitian, Public Health Specialist", "Theoretical Physicist, Mathematician, Data Scientist", 
                             "Cinematographer, Film Director, Photographer", "Business Manager, Financial Planner, Entrepreneur", 
                             "Psychologist, Social Worker, Counselor"],
    "Recommended Courses/Subjects": ["Chemistry, Environmental Science, Biology", "Political Science, History, Law", 
                                     "Literature, English, Media Studies", "Physics, Mathematics, Computer Science", 
                                     "Biology, Chemistry, Human Anatomy", "Economics, Business Studies, Mathematics", 
                                     "Art, Design, Photography", "Computer Science, Mathematics, Robotics", 
                                     "Music, Performing Arts, Literature", "Chemistry, Mathematics, Physics", 
                                     "Fashion Studies, Textile Design, Art", "Physical Education, Biology, Sports Science", 
                                     "Sociology, Psychology, Human Development", "Computer Science, Mathematics, Digital Art", 
                                     "Fine Arts, Digital Media, Animation", "Biology, Environmental Science, Oceanography", 
                                     "Economics, Mathematics, Statistics", "History, Political Science, Law", 
                                     "Music, Physics, Audio Engineering", "Engineering, Robotics, Computer Science", 
                                     "Biology, Nutrition, Chemistry", "Mathematics, Physics, Computer Science", 
                                     "Photography, Film Studies, Media Studies", "Business Studies, Economics, Marketing", 
                                     "Psychology, Sociology, Human Development"]
})

# Preprocess data
def preprocess_data(data):
    for column in ["Interests", "Strengths/Aptitudes", "Career Possibilities", "Recommended Courses/Subjects"]:
        data[column] = data[column].apply(lambda x: [item.strip() for item in x.split(',')])
    return data

data = preprocess_data(data)

# Multi-Label Binarization
mlb_interests = MultiLabelBinarizer()
mlb_strengths = MultiLabelBinarizer()
mlb_careers = MultiLabelBinarizer()
mlb_courses = MultiLabelBinarizer()

X_interests = mlb_interests.fit_transform(data["Interests"])
X_strengths = mlb_strengths.fit_transform(data["Strengths/Aptitudes"])
X = pd.concat([pd.DataFrame(X_interests), pd.DataFrame(X_strengths)], axis=1)

y_careers = mlb_careers.fit_transform(data["Career Possibilities"])
y_courses = mlb_courses.fit_transform(data["Recommended Courses/Subjects"])
y = pd.concat([pd.DataFrame(y_careers), pd.DataFrame(y_courses)], axis=1)

# Model training
model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Encode interests and strengths in the original dataset
data['Interests_Encoded'] = list(mlb_interests.transform(data['Interests']))
data['Strengths_Encoded'] = list(mlb_strengths.transform(data['Strengths/Aptitudes']))

def find_matching_rows(interests_input, strengths_input, data, mlb_interests, mlb_strengths):
    interests_transformed = mlb_interests.transform([interests_input])[0]
    strengths_transformed = mlb_strengths.transform([strengths_input])[0]
    
    # Calculate similarity for interests and strengths
    data['Interest_Match'] = data['Interests_Encoded'].apply(lambda x: sum(x * interests_transformed))
    data['Strength_Match'] = data['Strengths_Encoded'].apply(lambda x: sum(x * strengths_transformed))
    # Sort the dataset based on the best matches
    matched_data = data.sort_values(by=['Interest_Match', 'Strength_Match'], ascending=False)
    
    return matched_data

def combine_predictions(matching_rows, num_matches=3):
    combined_careers = set()
    combined_courses = set()

    for i in range(min(num_matches, len(matching_rows))):
        combined_careers.update(matching_rows.iloc[i]['Career Possibilities'])
        combined_courses.update(matching_rows.iloc[i]['Recommended Courses/Subjects'])

    return list(combined_careers), list(combined_courses)


@app.route('/')
def index():
    # Pass distinct interests and strengths to the template
    distinct_interests = sorted(set([item for sublist in data['Interests'] for item in sublist]))
    distinct_strengths = sorted(set([item for sublist in data['Strengths/Aptitudes'] for item in sublist]))
    return render_template('index.html', interests=distinct_interests, strengths=distinct_strengths)

@app.route('/predict', methods=['POST'])
def predict():
    input_data = request.json
    interests = input_data.get('interests', [])
    strengths = input_data.get('strengths', [])

    matching_rows = find_matching_rows(interests, strengths, data, mlb_interests, mlb_strengths)
    predicted_careers, predicted_courses = combine_predictions(matching_rows)

    return jsonify({
        'career_possibilities': predicted_careers,
        'recommended_courses': predicted_courses
    })

if __name__ == '__main__':
    app.run(debug=False)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
[2024-09-05 15:03:53,182] ERROR in app: Exception on / [GET]
Traceback (most recent call last):
  File "c:\Users\ACER\miniconda3\Lib\site-packages\flask\app.py", line 1473, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ACER\miniconda3\Lib\site-packages\flask\app.py", line 882, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ACER\miniconda3\Lib\site-packages\flask\app.py", line 880, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ACER\miniconda3\Lib\site-packages\flask\app.py", line 865, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ACER\AppData\Local\Temp\ip