# Investor-Founder Matching using Gemini API

## **Objective**
This notebook fine-tunes a Gemini AI model to match startup founders with investors based on compatibility scores. 

### **Steps**
1. Load structured startup and investor preference data.
2. Fine-tune the Gemini API model using training data.
3. Compute a compatibility score between founders and investors.
4. Rank and display investors based on their compatibility with founders.


In [3]:
import json

# Load the dataset
file_path = "investor_founder_data.json"

with open(file_path, "r") as file:
    data = json.load(file)

# Display a few samples
print("Sample Data:")
for i in range(3):
    print(json.dumps(data[i], indent=4))


Sample Data:
{
    "Founder Information": {
        "Industry": "Healthcare",
        "Startup Stage": "Series B",
        "Funding Required": "5M+",
        "Traction": "Early revenue",
        "Business Model": "B2B"
    },
    "Investor Preferences": {
        "Preferred Industry": "E-commerce",
        "Investment Range": "<100K"
    },
    "Match Score": 0.27
}
{
    "Founder Information": {
        "Industry": "Fintech",
        "Startup Stage": "Series B",
        "Funding Required": "1M-5M",
        "Traction": "Early revenue",
        "Business Model": "B2B"
    },
    "Investor Preferences": {
        "Preferred Industry": "AI",
        "Investment Range": "5M+"
    },
    "Match Score": 0.67
}
{
    "Founder Information": {
        "Industry": "Blockchain",
        "Startup Stage": "Growth",
        "Funding Required": "1M-5M",
        "Traction": "No revenue",
        "Business Model": "B2C"
    },
    "Investor Preferences": {
        "Preferred Industry": "E-commerce",
  

In [11]:
import google.generativeai as genai
import time
import json

# Manually set the API key (Replace with your actual API key)
api_key = "AIzaSyBOt9YZQff206WacrbMvHHbfq2vS-qsRyU"  # Replace with actual API key
genai.configure(api_key=api_key)

# Define base model
base_model = "models/gemini-1.5-flash-001-tuning"
print("Using model:", base_model)

# Load data (Ensure data is correctly formatted before processing)
with open("investor_founder_data.json", "r") as file:
    data = json.load(file)

# Preprocess data for fine-tuning
processed_data = []
for entry in data:
    try:
        investor_prefs = f"{entry['Investor Preferences']['Preferred Industry']}, {entry['Investor Preferences']['Investment Range']}"
        founder_info = f"{entry['Founder Information']['Industry']}, {entry['Founder Information']['Startup Stage']}, {entry['Founder Information']['Funding Required']}, {entry['Founder Information']['Traction']}, {entry['Founder Information']['Business Model']}"
        match_score = str(entry["Match Score"])  # Ensure score is a string
        
        processed_data.append({
            "text_input": f"{investor_prefs} - {founder_info}",
            "output": match_score
        })
    except KeyError as e:
        print(f"Skipping an entry due to missing key: {e}")

print(f"Processed training data created! Total samples: {len(processed_data)}")

# Fine-tune the model
try:
    operation = genai.create_tuned_model(
        display_name="Investor_Founder_Match",
        source_model=base_model,
        epoch_count=10,
        batch_size=4,
        learning_rate=0.001,
        training_data=processed_data,
        input_key="text_input",  # Explicitly defining keys
        output_key="output"
    )
    print("Fine-tuning in progress...")

    for status in operation.wait_bar():
        time.sleep(10)

    result = operation.result()
    print(f"Model fine-tuned successfully! Model ID: {result.name}")

except Exception as e:
    raise RuntimeError(f"Failed to fine-tune the model: {e}") from e


Using model: models/gemini-1.5-flash-001-tuning
Processed training data created! Total samples: 250
Fine-tuning in progress...


  0%|          | 0/625 [00:00<?, ?it/s]

Model fine-tuned successfully! Model ID: tunedModels/investorfoundermatch-y2gzz27f99gl


In [16]:
import google.generativeai as genai

# Function to get match scores using fine-tuned model
def get_match_score(founder, investor):
    input_text = f"{investor['Preferred Industry']}, {investor['Investment Range']} - {founder['Industry']}, {founder['Startup Stage']}, {founder['Funding Required']}, {founder['Traction']}, {founder['Business Model']}"
    model = genai.GenerativeModel(model_name="tunedModels/investorfoundermatch-y2gzz27f99gl")
    
    try:
        response = model.generate_content(input_text)
        return float(response.text.strip())  
    except Exception as e:
        print("Error generating match score:", e)
        return None  


sample_founder = data[0]["Founder Information"]
sample_investor = data[0]["Investor Preferences"]
print("Sample Compatibility Score:", get_match_score(sample_founder, sample_investor))


Sample Compatibility Score: 0.97


In [None]:
import time
def rank_investors(founder_info, investor_list):
    scored_investors = []
    
    for investor in investor_list:
        score = get_match_score(founder_info, investor)
        time.sleep(5)
        scored_investors.append({"Investor": investor, "Score": score})
    
    ranked_results = sorted(scored_investors, key=lambda x: x["Score"], reverse=True)
    
    return ranked_results

founder = data[0]["Founder Information"]
investors = [entry["Investor Preferences"] for entry in data]

ranked_list = rank_investors(founder, investors)

print("Top 5 Investors for Founder:")
for i, result in enumerate(ranked_list[:5]):
    print(f"{i+1}. Investor: {result['Investor']}, Score: {result['Score']}")
    


In [None]:
import fitz 
import pdfplumber 
import google.generativeai as genai
import json
import time


In [None]:

api_key = "YOUR_GEMINI_API_KEY"  
genai.configure(api_key=api_key)
gemini_model = "models/gemini-1.5-pro"


In [None]:
def extract_text_from_pdf(pdf_path):
    extracted_text = ""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                extracted_text += page.extract_text() + "\n"
    except Exception as e:
        print("Error extracting text:", e)
        return None
    return extracted_text


In [None]:
pdf_path = "sample_pitch.pdf"  
pitch_text = extract_text_from_pdf(pdf_path)

if pitch_text:
    print("Extracted Text (First 500 characters):\n", pitch_text[:500])
else:
    print("No text extracted from PDF!")


In [None]:
def analyze_pitch_with_gemini(text):
   
    model = genai.GenerativeModel(model_name=gemini_model)
    
    
    prompts = [
        "Analyze the startup pitch based on Problem, Solution, Market, Business Model, Financials, and Team. Assign a pitch score (0-100).",
        "What are the key strengths of this pitch?",
        "What are the weaknesses or areas that need improvement?",
        "Suggest specific improvements or additional data to strengthen the pitch."
    ]
    
    results = {}
    for idx, prompt in enumerate(prompts):
        try:
            response = model.generate_content(f"{prompt}\n\nPitch Text:\n{text}")
            results[f"Step {idx+1}"] = response.text.strip()
            time.sleep(2)  # Avoid hitting API rate limits
        except Exception as e:
            print(f"Error in query {idx+1}:", e)
            results[f"Step {idx+1}"] = "Error in processing"
    
    return results


In [None]:

analysis_results = analyze_pitch_with_gemini(pitch_text)
for key, value in analysis_results.items():
    print(f"\n{key}:\n{value}\n" + "-"*80)


In [None]:

with open("pitch_analysis_results.json", "w") as file:
    json.dump(analysis_results, file, indent=4)
print("Results saved to pitch_analysis_results.json ✅")


In [None]:
from fpdf import FPDF

def generate_pdf_report(results, output_file="Pitch_Analysis_Report.pdf"):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", "B", 16)
    
    pdf.cell(200, 10, "Startup Pitch Analysis Report", ln=True, align="C")
    pdf.ln(10)
    
    pdf.set_font("Arial", size=12)
    for key, value in results.items():
        pdf.set_font("Arial", "B", 12)
        pdf.cell(200, 10, key, ln=True)
        pdf.set_font("Arial", size=12)
        pdf.multi_cell(0, 10, value)
        pdf.ln(5)
    
    pdf.output(output_file)
    print(f"Report saved as {output_file} ✅")

# Generate the final report
generate_pdf_report(analysis_results)
