In [1]:
!pip install pandas sentence-transformers faiss-cpu
!pip install sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m9.6 MB/s[0m  [33m0:00:03[0mm0:00:01[0m:00:01[0mm
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [24]:
# 📂 2. Load Dataset
# ==============================
# (Fold: Data Loading)

import pandas as pd

# Load CSV file
df = pd.read_csv("freelancers.csv")
df.head()
# ==============================
# 🧹 3. Preprocess Freelancer Profiles
# ==============================
# (Fold: Preprocessing)

def build_profile(row):
    return f"{row['name']} is skilled in {row['skills']}. " \
           f"Experience: {row['experience_years']} years. " \
           f"Rating: {row['rating']}."

df["profile_text"] = df.apply(build_profile, axis=1)
df[["freelancer_id", "profile_text"]]
# ==============================
# 🧠 4. Generate Embeddings
# ==============================
# (Fold: Embeddings)

from sentence_transformers import SentenceTransformer

# Load a sentence-transformer model for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode freelancer profiles
freelancer_embeddings = model.encode(df["profile_text"].tolist(), normalize_embeddings=True)
# ==============================
# 📊 5. Store Embeddings in FAISS
# ==============================
# (Fold: FAISS Index)

import faiss
import numpy as np

# Dimension of embeddings
d = freelancer_embeddings.shape[1]

# Create FAISS index (cosine similarity via inner product)
index = faiss.IndexFlatIP(d)

# Add embeddings
index.add(np.array(freelancer_embeddings))
# ==============================
# 🔍 6. Define Search Function
# ==============================
# (Fold: Matching Function)

def find_best_freelancers(project_desc, top_k=3):
    # Encode query
    query_emb = model.encode([project_desc], normalize_embeddings=True)
    
    # Search in FAISS
    scores, indices = index.search(np.array(query_emb), top_k)
    
    # Collect results
    results = []
    for idx, score in zip(indices[0], scores[0]):
        freelancer = df.iloc[idx]
        results.append({
            "freelancer_id": freelancer["freelancer_id"],
            "name": freelancer["name"],
            "skills": freelancer["skills"],
            "experience_years": freelancer["experience_years"],
            "rating": freelancer["rating"],
            "similarity_score": float(score)
        })
    return results
# ==============================
# 🧪 7. Test with Example Project
# ==============================
# (Fold: Testing)

project = "Looking for an expert in Machine learning, Deep learning, Python ."
matches = find_best_freelancers(project, top_k=3)

for m in matches:
    print(m)

{'freelancer_id': 'F001', 'name': 'Alice Johnson', 'skills': 'Python, Machine Learning, NLP', 'experience_years': np.int64(5), 'rating': np.float64(4.8), 'similarity_score': 0.5783983469009399}
{'freelancer_id': 'F004', 'name': 'Diana Garcia', 'skills': 'Data Science, Deep Learning, PyTorch', 'experience_years': np.int64(6), 'rating': np.float64(4.9), 'similarity_score': 0.5016211867332458}
{'freelancer_id': 'F006', 'name': 'Fatima Noor', 'skills': 'Fullstack Development, Python, React', 'experience_years': np.int64(4), 'rating': np.float64(4.6), 'similarity_score': 0.4348406195640564}


In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# ===============================
# Proposal Ranking Function
# ===============================

#region Load and Normalize Data
def rank_proposals(csv_file="proposals.csv"):
    # Step 1: Load proposals
    df = pd.read_csv(csv_file)

    # Step 2: Normalize positive criteria (higher is better)
    scaler = MinMaxScaler()
    df[['relevance_score', 'rating', 'success_rate']] = scaler.fit_transform(
        df[['relevance_score', 'rating', 'success_rate']]
    )

    # Step 3: Normalize negative criterion (bid_price → lower is better)
    df['bid_price'] = (df['bid_price'].max() - df['bid_price']) / (
        df['bid_price'].max() - df['bid_price'].min()
    )
#endregion

#region Weights and Final Score
    # Step 4: Define weights
    weights = {
        'relevance_score': 0.4,
        'rating': 0.25,
        'success_rate': 0.25,
        'bid_price': 0.1
    }

    # Step 5: Compute final score
    df['final_score'] = (
        df['relevance_score'] * weights['relevance_score'] +
        df['rating'] * weights['rating'] +
        df['success_rate'] * weights['success_rate'] +
        df['bid_price'] * weights['bid_price']
    )
#endregion

#region Sorting
    # Step 6: Sort proposals best → worst

    # Only select columns that exist in the DataFrame
    columns_to_return = ['proposal_id', 'final_score']
    if 'freelancer_name' in df.columns:
        columns_to_return.insert(1, 'freelancer_name')

    ranked_df = df.sort_values(by='final_score', ascending=False)
    return ranked_df[columns_to_return]
#endregion

#region Example Usage
if __name__ == "__main__":
    ranked = rank_proposals("proposals.csv")
    print(ranked)
#endregion

  proposal_id  final_score
0       PR001     0.866667
2       PR003     0.827273
3       PR004     0.610227
1       PR002     0.361742
4       PR005     0.100000


In [5]:
# ===============================
# BizGenie AI Engineer Microservice
# ===============================

#region Imports
from fastapi import FastAPI
from pydantic import BaseModel
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
#endregion

#region App Setup
app = FastAPI(title="BizGenie Freelancer Matcher")
#endregion

#region Request Schema
class ProjectRequest(BaseModel):
    project_description: str
#endregion

#region Ranking Logic
def rank_proposals(csv_file: str):
    df = pd.read_csv(csv_file)

    # Normalize positive criteria
    scaler = MinMaxScaler()
    df[['relevance_score', 'rating', 'success_rate']] = scaler.fit_transform(
        df[['relevance_score', 'rating', 'success_rate']]
    )

    # Normalize negative (bid_price → lower is better)
    if df['bid_price'].max() == df['bid_price'].min():
        df['bid_price'] = 1.0  # If all prices are the same, treat as best
    else:
        df['bid_price'] = (df['bid_price'].max() - df['bid_price']) / (
            df['bid_price'].max() - df['bid_price'].min()
        )

    # Weights
    weights = {
        'relevance_score': 0.4,
        'rating': 0.25,
        'success_rate': 0.25,
        'bid_price': 0.1
    }

    # Final score
    df['final_score'] = (
        df['relevance_score'] * weights['relevance_score'] +
        df['rating'] * weights['rating'] +
        df['success_rate'] * weights['success_rate'] +
        df['bid_price'] * weights['bid_price']
    )

    return df.sort_values(by='final_score', ascending=False)
#endregion

#region API Endpoint
@app.post("/match_freelancers")
def match_freelancers(request: ProjectRequest):
    # Normally, you'd compute similarity with request.project_description
    # Here, we just rank existing proposals for simplicity

    # Check if proposals.csv exists
    import os
    if not os.path.exists("proposals.csv"):
        return {"error": "proposals.csv file not found. Please upload the file."}

    ranked_df = rank_proposals("proposals.csv")

    # If there are no proposals, return empty matches
    if ranked_df.empty:
        return {"matches": []}

    # Format output as required: "score" instead of "final_score", rounded to 2 decimals
    results = [
        {
            "freelancer_id": row["freelancer_id"],
            "score": round(row["final_score"], 2)
        }
        for _, row in ranked_df[['freelancer_id', 'final_score']].head(3).iterrows()
    ]
    return {"matches": results}
#endregion

# Note:
# In a Jupyter notebook, FastAPI endpoints will not run unless you start the server (e.g., with uvicorn).
# To test the function in a notebook, you can call rank_proposals directly and display the result:
# Example:
rank_proposals("proposals.csv").head(3)

Unnamed: 0,proposal_id,freelancer_id,relevance_score,rating,bid_price,success_rate,final_score
0,PR001,F001,1.0,0.833333,0.5,0.833333,0.866667
2,PR003,F004,0.818182,1.0,0.0,1.0,0.827273
3,PR004,F006,0.681818,0.5,0.666667,0.583333,0.610227
