In [1]:
import os, json, re
import random, time
import pandas as pd, numpy as np
from tqdm import tqdm
from datetime import datetime, timedelta
from dotenv import load_dotenv

from itertools import islice

# Util built for sharktank project
import trial_sharktank_utils_mod as su
from trial_sharktank_utils_mod import load_facts, PitchOrchestrator, metrics_calculation, PitchEditor, OrchestratorResponse, PitchEquipped

# Agno for llm agents
from agno.agent import Agent
from agno.models.groq import Groq
from agno.storage.agent.sqlite import SqliteAgentStorage
from agno.tools.duckduckgo import DuckDuckGoTools
# from agno.tools.wikipedia import WikipediaTools

# Other LLM utils
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List, Dict, Optional

  from .autonotebook import tqdm as notebook_tqdm



>> Chunking Strategy: FIXED
>> Agent Storage File: tmp/pdf_rag.db
>> Agent Table Name: HBS_fixed_agent
>> ChromaDB Collection Name: HBS_fixed
>> ChromaDB Storage Path: ./chromadb_data
>> PDFs Being Processed:
   - https://raw.githubusercontent.com/sciencenerd880/LLM-DBT/main/data/pdfs/hbs_opportunities.pdf
   - https://raw.githubusercontent.com/sciencenerd880/LLM-DBT/main/data/pdfs/hbs_pitchdeck_sample.pdf



In [None]:
load_dotenv()
os.environ["OEPNAI_API_KEY"] = ""
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [3]:
reference_models = su.REFERENCE_MODELS
agent_storage = su.agent_storage
edit_reference_models = su.EDIT_REFERENCE_MODELS

In [4]:
facts_store = load_facts()
pitches = {k:"" for k,_ in facts_store.items()}
metrics = {k:"" for k,_ in facts_store.items()}
time_stamps = {k:"" for k,_ in facts_store.items()}

In [5]:
facts_store = load_facts()
remaining_pitches = [
    "facts_shark_tank_transcript_23_Stakt.txt"
]

facts_store = {k:v for k,v in facts_store.items() if k in remaining_pitches}

In [6]:
print(facts_store.keys())

dict_keys(['facts_shark_tank_transcript_23_Stakt.txt'])


In [7]:
# Main loop
orcheditor = PitchEquipped(
    reference=edit_reference_models, 
    iterations=2,
    orchestrator="llama-3.3-70b-versatile"
    # orchestrator="deepseek-r1-distill-llama-70b"
)
goal = 'create a pitch with the given facts'

RUN_FIRST_N = False      # Change to False to run all
N = 1

cases_to_run = islice(facts_store.items(), N) if RUN_FIRST_N else facts_store.items()
# cases_to_run = facts_store
for case, facts in tqdm(cases_to_run, desc="Generating pitches"):
# case = 'facts_shark_tank_transcript_28_HummViewer.txt'
# facts = facts_store[case]
    lean_facts = {
        'facts': facts['facts'],
        'product_description': facts['product_description']
    }
    if pitches[case] != "":
        # Reset for next run
        orcheditor.logs = []
        orcheditor.agents = {}
    try:
        pitch = orcheditor.orchestrate_with_edit(goal, lean_facts, verbose=False)
        pitches[case] = pitch
        metrics[case] = orcheditor.logs
        time_stamps[case] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        print(f"{case} error: {e}")
        pitches[case] = ""
        metrics[case] = []
        time_stamps[case] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Reset for next run
    orcheditor.logs = []
    orcheditor.agents = {}

Generating pitches:   0%|          | 0/1 [00:00<?, ?it/s]

[00:50<00:00] 2/2 | Running Pitch Editing Iterations: 100%|[33m██████████[0m 25.14s/it
Generating pitches: 100%|██████████| 1/1 [00:50<00:00, 50.29s/it]


In [8]:
print(pitch)

{
    "Pitch": "Introducing Stakt, the revolutionary foldable yoga mat that combines functionality, comfort, and convenience. With its unique foldable design, Stakt offers easy storage and transport, perfect for yogis on-the-go. The mat's double thickness provides extra cushioning and support, ideal for intense workouts and joint issues. Additionally, Stakt transforms into a block for various exercises, adding versatility to your practice. The convenient carrying strap makes transportation effortless. Made from non-toxic, non-porous EVA foam, Stakt prioritizes safety and durability. With your investment, we plan to scale production, expand our marketing efforts, and explore strategic partnerships to bring Stakt to the masses and capitalize on the growing demand for premium yoga mats.",
    "Initial_Offer": {
        "Valuation": "$1 million",
        "Equity_Offered": "10%",
        "Funding_Amount": "$100,000",
        "Key_Terms": "Funds will be allocated to increase production capac

In [9]:
calculated_metrics = []
for product, metric in metrics.items():
    calculated_metrics.append(metrics_calculation(metric))
input_lengths, output_lengths, latencies = zip(*calculated_metrics)

In [10]:
timestamp = datetime.now().strftime('%Y-%d-%m')
framework = "orchestrator"
layer = "orchestrator_1"
pitches_df = pd.DataFrame(
    data={
        'scenario_name':list(pitches.keys()),
        'framework':len(pitches) * [framework],
        'layer':len(pitches) * [layer],
        'model_name': len(pitches) * ["groq/" + orcheditor.orchestrator + "/groq" + orcheditor.editor],
        'model_identity':len(pitches) * ["groq/" + orcheditor.orchestrator + "/groq" + orcheditor.editor],
        'latency':latencies,
        'input_length':input_lengths,
        'output_length':output_lengths,
        'time_stamp':list(time_stamps.values()),
        'response':list(pitches.values()),
    }
)

In [11]:
pitches_df.to_excel(f"{timestamp}-orchestrator-editRAGlong-versatile-fewshot-01.xlsx")