In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, confloat
from fastapi.responses import JSONResponse
from typing import List
from contextlib import asynccontextmanager
import uvicorn

# ==============================================
# Training and saving models
# ==============================================

def train_and_save_models():
    # Load data
    df = pd.read_csv("cs_tracks_recommendation_system_datasetfinal1752025 (1).csv")
    
    # Ensure all values are between 0 and 3
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    df[numeric_cols] = df[numeric_cols].clip(0, 3)
    
    # Encode target
    le = LabelEncoder()
    y = le.fit_transform(df['Track'])
    
    # Get features by dropping target column
    X = df.drop('Track', axis=1)
    feature_names = X.columns.tolist()
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.15, stratify=y, random_state=42
    )
    
    # Scale data (without SMOTE)
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Dictionary to store models
    models = {
        'XGBoost': XGBClassifier(
            max_depth=5,
            learning_rate=0.1,
            n_estimators=200,
            objective='multi:softmax',
            num_class=len(le.classes_),
            eval_metric='merror'
        ),
        'RandomForest': RandomForestClassifier(
            n_estimators=200,
            max_depth=10,
            random_state=42
        ),
        'SVM': SVC(
            C=1.0,
            kernel='rbf',
            probability=True,
            random_state=42
        ),
        'KNN': KNeighborsClassifier(
            n_neighbors=5
        )
    }
    
    # Train and evaluate each model
    best_model = None
    best_accuracy = 0
    
    for name, model in models.items():
        print(f"\nTraining {name}...")
        model.fit(X_train_scaled, y_train)
        
        # Evaluate
        y_pred = model.predict(X_test_scaled)
        y_test_decoded = le.inverse_transform(y_test)
        y_pred_decoded = le.inverse_transform(y_pred)
        
        print(classification_report(y_test_decoded, y_pred_decoded))
        train_acc = model.score(X_train_scaled, y_train)
        test_acc = model.score(X_test_scaled, y_test)
        print(f"Train Accuracy: {train_acc:.2%}")
        print(f"Test Accuracy: {test_acc:.2%}")
        
        # Track best model
        if test_acc > best_accuracy:
            best_accuracy = test_acc
            best_model = model
    
    # Save the best model and tools
    if not os.path.exists("Models"):
        os.makedirs("Models")
    
    joblib.dump({
        'model': best_model,
        'scaler': scaler,
        'label_encoder': le,
        'feature_names': feature_names,
        'best_model_name': type(best_model).__name__
    }, "Models/best_model.pkl")
    
    print(f"\n✅ Best model saved successfully! Model: {type(best_model).__name__}")

# ==============================================
# FastAPI part
# ==============================================

# Global variables
model = None
scaler = None
le = None
feature_names = None
model_name = None

# Lifespan management
@asynccontextmanager
async def lifespan(app: FastAPI):
    global model, scaler, le, feature_names, model_name
    try:
        model_data = joblib.load("Models/best_model.pkl")
        model = model_data['model']
        scaler = model_data['scaler']
        le = model_data['label_encoder']
        feature_names = model_data['feature_names']
        model_name = model_data['best_model_name']
        print(f"✅ Models loaded successfully. Best model: {model_name}")
    except Exception as e:
        print(f"❌ Failed to load models: {str(e)}")
        raise
    yield

app = FastAPI(
    title="Technical Tracks Recommendation System (Simple Version)",
    description="Recommends top 3 technical tracks based on user skills (without SMOTE or PCA)",
    version="1.0",
    lifespan=lifespan
)

# User input model
class UserInput(BaseModel):
    Programming_Skills: confloat(ge=0, le=3) = 1.5
    Computer_Architecture: confloat(ge=0, le=3) = 1.5
    Technical_Communication: confloat(ge=0, le=3) = 1.5
    Cyber_Security: confloat(ge=0, le=3) = 1.5
    AI: confloat(ge=0, le=3) = 1.5
    Biology_Biochemistry_Basic: confloat(ge=0, le=3) = 1.5
    Networking: confloat(ge=0, le=3) = 1.5
    Troubleshooting_Skills: confloat(ge=0, le=3) = 1.5
    Database_Fundamentals: confloat(ge=0, le=3) = 1.5
    Leadership_Experience: confloat(ge=0, le=3) = 1.5
    Project_Management: confloat(ge=0, le=3) = 1.5
    Software_Engineering: confloat(ge=0, le=3) = 1.5
    Business_knowledge: confloat(ge=0, le=3) = 1.5
    Communication_Skills: confloat(ge=0, le=3) = 1.5
    Data_Science: confloat(ge=0, le=3) = 1.5
    Graphics_Designing: confloat(ge=0, le=3) = 1.5

    model_config = {
        "json_schema_extra": {
            "example": {
                "Programming_Skills": 1,
                "Computer_Architecture": 1,
                "Technical_Communication": 1,
                "Cyber_Security": 1,
                "AI": 1,
                "Biology_Biochemistry_Basic": 1,
                "Networking": 1,
                "Troubleshooting_Skills": 1,
                "Database_Fundamentals": 1,
                "Leadership_Experience": 1,
                "Project_Management": 1,
                "Software_Engineering": 1,
                "Business_knowledge": 1,
                "Communication_Skills": 1,
                "Data_Science": 1,
                "Graphics_Designing": 1
            }
        }
    }

# Output model
class Recommendation(BaseModel):
    track: str
    probability: float

class RecommendationsResponse(BaseModel):
    top_recommendations: List[Recommendation]
    message: str = "Recommendations generated successfully"
    model_used: str

@app.get("/")
async def root():
    return {"message": "Welcome to the Technical Tracks Recommendation System (Simple Version)"}

@app.post("/recommend", response_model=RecommendationsResponse)
async def get_recommendations(user_input: UserInput):
    try:
        # Convert input to DataFrame
        input_data = user_input.model_dump()
        input_df = pd.DataFrame([input_data])[feature_names]
        
        # Preprocess (only scaling)
        X_scaled = scaler.transform(input_df)
        
        # Predict probabilities
        probs = model.predict_proba(X_scaled)[0]
        
        # Get top 3 recommendations
        top3_indices = np.argsort(probs)[-3:][::-1]
        top3_tracks = le.inverse_transform(top3_indices)
        top3_probs = probs[top3_indices]
        
        # Prepare response
        recommendations = [
            {"track": track, "probability": float(prob)}
            for track, prob in zip(top3_tracks, top3_probs)
        ]
        
        return {
            "top_recommendations": recommendations,
            "message": "Recommendations generated successfully",
            "model_used": model_name
        }
    
    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Error processing your request: {str(e)}"
        )

@app.get("/tracks")
async def get_available_tracks():
    return {
        "available_tracks": le.classes_.tolist(),
        "count": len(le.classes_)
    }

# ==============================================
# Main execution
# ==============================================

if __name__ == "__main__":
    # Train model first
    print("Training the simple model...")
    train_and_save_models()
    
    # Run server
    import nest_asyncio
    nest_asyncio.apply()
    
    uvicorn.run(app, host="127.0.0.1", port=8000)

Training the simple model...

Training XGBoost...
                          precision    recall  f1-score   support

     Backend Development       0.95      0.94      0.95       150
  Blockchain Development       0.96      0.81      0.88       180
      Cloud Architecture       0.69      0.93      0.79       180
       Computer Literacy       0.56      0.87      0.68       210
           Cybersecurity       0.94      0.91      0.93       150
           Data Analysis       0.93      0.91      0.92       150
 Database Administration       0.93      0.94      0.93       150
      DevOps Engineering       0.92      0.73      0.81       180
    Digital Productivity       0.87      0.70      0.78       180
    Frontend Development       0.97      0.98      0.97       150
         IT Fundamentals       0.84      0.65      0.73       180
   IT Project Management       0.96      0.78      0.86       180
        Machine Learning       0.96      0.96      0.96       150
    Network Fundamentals 

INFO:     Started server process [29452]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


✅ Models loaded successfully. Best model: XGBClassifier
INFO:     127.0.0.1:52325 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:52325 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:52326 - "POST /recommend HTTP/1.1" 200 OK
INFO:     127.0.0.1:52333 - "POST /recommend HTTP/1.1" 200 OK
INFO:     127.0.0.1:52337 - "POST /recommend HTTP/1.1" 200 OK
