In [1]:
import os, json, re
import random, time
import pandas as pd, numpy as np
from tqdm import tqdm
from datetime import datetime, timedelta
from dotenv import load_dotenv

from itertools import islice

# Util built for sharktank project
import trial_sharktank_utils_mod as su
from trial_sharktank_utils_mod import load_facts, PitchOrchestrator, metrics_calculation, PitchEditor, OrchestratorResponse, PitchEquipped

# Agno for llm agents
from agno.agent import Agent
from agno.models.groq import Groq
from agno.storage.agent.sqlite import SqliteAgentStorage
from agno.tools.duckduckgo import DuckDuckGoTools
# from agno.tools.wikipedia import WikipediaTools

# Other LLM utils
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List, Dict, Optional

  from .autonotebook import tqdm as notebook_tqdm



>> Chunking Strategy: FIXED
>> Agent Storage File: tmp/pdf_rag.db
>> Agent Table Name: HBS_fixed_agent
>> ChromaDB Collection Name: HBS_fixed
>> ChromaDB Storage Path: ./chromadb_data
>> PDFs Being Processed:
   - https://raw.githubusercontent.com/sciencenerd880/LLM-DBT/main/data/pdfs/hbs_opportunities.pdf
   - https://raw.githubusercontent.com/sciencenerd880/LLM-DBT/main/data/pdfs/hbs_pitchdeck_sample.pdf



Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 36 0 (offset 0)
Ignoring wrong pointing object 46 0 (offset 0)
Ignoring wrong pointing object 48 0 (offset 0)
Ignoring wrong pointing object 50 0 (offset 0)
Ignoring wrong pointing object 61 0 (offset 0)
Ignoring wrong pointing object 71 0 (offset 0)
Ignoring wrong pointing object 81 0 (offset 0)
Ignoring wrong pointing object 83 0 (offset 0)


In [None]:
load_dotenv()
os.environ["OEPNAI_API_KEY"] = "" # insert api key here for Open AI 
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [3]:
reference_models = su.REFERENCE_MODELS
agent_storage = su.agent_storage
edit_reference_models = su.EDIT_REFERENCE_MODELS

In [4]:
facts_store = load_facts()
pitches = {k:"" for k,_ in facts_store.items()}
metrics = {k:"" for k,_ in facts_store.items()}
time_stamps = {k:"" for k,_ in facts_store.items()}

In [None]:
# Main loop
orcheditor = PitchEquipped(reference=edit_reference_models, 
                          iterations=2,
                          orchestrator="deepseek-r1-distill-llama-70b"
                        )
goal = 'create a pitch with the given facts'

RUN_FIRST_N = False      # Change to False to run all
N = 130

cases_to_run = islice(facts_store.items(), N) if RUN_FIRST_N else facts_store.items()
for case, facts in tqdm(cases_to_run, desc="Generating pitches"):
    # case = 'facts_shark_tank_transcript_37_TIK PIK.txt'
    lean_facts = {
        'facts': facts['facts'],
        'product_description': facts['product_description']
    }
    if pitches[case] != "":
        # Reset for next run
        orcheditor.logs = []
        orcheditor.agents = {}
    try:
        pitch = orcheditor.orchestrate_with_edit(goal, lean_facts, verbose=False)
        pitches[case] = pitch
        metrics[case] = orcheditor.logs
        time_stamps[case] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        print(f"{case} error: {e}")
        pitches[case] = ""
        metrics[case] = []
        time_stamps[case] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Reset for next run
    orcheditor.logs = []
    orcheditor.agents = {}

[00:34<?] 0/2 | Running Pitch Editing Iterations:   0%|[33m          [0m ?it/s
Generating pitches: 1it [00:34, 34.71s/it]

No JSON found.
Parsing Synthesizer Error: local variable 'json_str' referenced before assignment
Response:


**Pitch:**

Introducing GarmaGuard, the innovative solution for on-the-go fabric care. As the first natural garment and fabric cleanser using natural propellants, GarmaGuard is revolutionizing the way we maintain fresh clothing. Our product is designed to eliminate odors, freshen fabrics, control dirt, kill 99% of germs, and is perfectly sized for convenience—making it an essential item for every eco-conscious consumer.

**Financial Performance:**

Since our launch 1.5 years ago, GarmaGuard has achieved impressive sales of $476,000, with a projected $500,000 for the current year. Our profitability stands at $100,000, representing a 20% margin, demonstrating strong financial health and growth potential.

**Customer Loyalty and Market Reception:**

With a loyal customer base of 14,000, a conversion rate of 6.6%, and a return customer rate of 33%, GarmaGuard has proven its effectiv

[00:43<00:00] 2/2 | Running Pitch Editing Iterations: 100%|[33m██████████[0m 21.71s/it
[01:12<00:00] 2/2 | Running Pitch Editing Iterations: 100%|[33m██████████[0m 36.18s/it
Generating pitches: 3it [02:30, 50.17s/it]


In [None]:
calculated_metrics = []
for product, metric in metrics.items():
    calculated_metrics.append(metrics_calculation(metric))
input_lengths, output_lengths, latencies = zip(*calculated_metrics)

In [None]:
timestamp = datetime.now().strftime('%Y-%d-%m')
framework = "orchestrator"
layer = "orchestrator_1"
pitches_df = pd.DataFrame(
    data={
        'scenario_name':list(pitches.keys()),
        'framework':len(pitches) * [framework],
        'layer':len(pitches) * [layer],
        'model_name': len(pitches) * ["groq/" + orcheditor.orchestrator + "/groq" + orcheditor.editor],
        'model_identity':len(pitches) * ["groq/" + orcheditor.orchestrator + "/groq" + orcheditor.editor],
        'latency':latencies,
        'input_length':input_lengths,
        'output_length':output_lengths,
        'time_stamp':list(time_stamps.values()),
        'response':list(pitches.values()),
    }
)

In [None]:
pitches_df.to_excel(f"{timestamp}-orchestrator-editRAGfinal-pitches.xlsx")