# Install required dependencies

In [None]:
!pip install -r requirements.txt

# Set API key for Gemini

In [None]:
os.environ["GEMINI_API_KEY"] = "AIzaSyAqA8qHNtScjH-LPz8Oh0NRwqfFSsRSjo4"#<YOUR_API_KEY>
print(os.getenv("GEMINI_API_KEY"))

# Import Dependency

In [None]:
import enum
import pandas as pd
from pydantic import BaseModel, Field, conint
from google import genai
from typing import Literal, Optional

# Define a dictionary to store weights separately

In [None]:
WEIGHTS = {
    "industry_match": 20,
    "investment_fit": 10,
    "startup_stage_alignment": 15,
    "previous_investment_preferences": 15,
    "market_trends": 5,
    "geographical_alignment": 5,
    "founder_experience": 15,
}


# Detailed Score Classes 

In [None]:
# ---------- Detailed Score Classes ----------
class IndustryMatch(BaseModel):
    industry_match: conint(ge=0, le=10)  # High weight
    reason: str

class InvestmentFit(BaseModel):
    investment_fit: conint(ge=0, le=10)  # High weight
    reason: str

class StartupStageAlignment(BaseModel):
    startup_stage_alignment: conint(ge=0, le=10)  # Medium weight
    reason: str

class PreviousInvestments(BaseModel):
    previous_investments: conint(ge=0, le=10)  # Medium weight
    reason: str

class MarketTrends(BaseModel):
    market_trends: conint(ge=0, le=10)  # Low weight
    reason: str

class GeographicalAlignment(BaseModel):
    geographical_alignment: conint(ge=0, le=10)  # Low weight
    reason: str

class FounderExperience(BaseModel):
    founder_experience: conint(ge=0, le=10) # Medium weight
    reason: str

# ---------- Aggregated Criteria Model ----------
class Criteria(BaseModel):
    industry_match: IndustryMatch
    investment_fit: InvestmentFit
    startup_stage_alignment: StartupStageAlignment
    previous_investments: PreviousInvestments
    market_trends: MarketTrends
    geographical_alignment: GeographicalAlignment
    founder_experience: FounderExperience

# Define Match Score Model
class MatchScore(BaseModel):
    investor_id: str
    founder_id: str
    rating: conint(ge=0, le=10)
    criteria_for_rating: Criteria
    explanation_for_rating: str
    suggestion_for_founder: str
    suggestion_for_investor: str


# Initialize Gemini API Client

In [None]:

client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

# Function to Call Gemini API

In [None]:

def get_match_score(investor_id, investor_data, founder_id, founder_data):
    prompt = f"""
    Given the following Founder and Investor profiles, rate their compatibility on a scale of 1 to 10:
    
    Founder Profile:
    {founder_data}
    
    Investor Profile:
    {investor_data}
    
    Justify the score based on industry match, funding fit, and startup stage alignment.

    Note: If any of the information is missing, rate the criteria zero.
    """
    response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=prompt,
        config={
            'response_mime_type': 'application/json',
            'response_schema': MatchScore,
        },
    )
    return response.text.strip()

# Function to compute final score

In [None]:

def compute_final_score(criteria):
    WEIGHTS = {
        "industry_match": 70,
        "investment_fit": 50,
        "startup_stage_alignment": 40,
        "previous_investments": 5,
        "market_trends": 5,
        "geographical_alignment": 5,
        "founder_experience": 5,
    }
    MAX_POSSIBLE_SCORE = sum(weight * 10 for weight in WEIGHTS.values())  # 850
    total_score = sum(
        criteria[key][key] * weight  # Multiply score by weight
        for key, weight in WEIGHTS.items()
        if key in criteria
    )
    final_rating = round((total_score / MAX_POSSIBLE_SCORE) * 100)  # Normalize to 100
    return final_rating


# Read CSV Files

In [None]:

founder_df = pd.read_csv("founder.csv")
investor_df = pd.read_csv("investor.csv")


# Extract Information

In [None]:

founders_info = {row['Founder_ID']: row.to_dict() for _, row in founder_df.iterrows()}
investors_info = {row['Investor_ID']: row.to_dict() for _, row in investor_df.iterrows()}

# Compute Match Scores

In [None]:

import json
import time
match_scores = []
for investor_id, investor_data in investors_info.items():
    for founder_id, founder_data in founders_info.items():
        match_score = get_match_score(investor_id, investor_data, founder_id, founder_data)
        matches = json.loads(match_score)
        matches['rating'] = compute_final_score(matches['criteria_for_rating'])
        match_scores.append(matches)
        time.sleep(5)
    time.sleep(15)

# Convert match scores to DataFrame and save as CSV


In [None]:
data_dicts = [item for item in match_scores]
df = pd.json_normalize(data_dicts, sep="_")  # Flatten nested JSON
df.to_csv("investor_founder_ratings.csv", index=False)
print("Match scoring completed! Results saved to investor_founder_ratings.csv.")
display(df.head())