In [None]:
!apt-get update && apt-get install ffmpeg -y
!apt install tmux vim -y
!pip3 install moviepy openai python-dotenv pydub pytubefix openai-whisper llama-index llama-index-llms-openai llama-index-llms-ollama llama-index-embeddings-ollama 
!pip3 install flash-attn --no-build-isolation
!curl -fsSL https://ollama.com/install.sh | sh

# !chmod +x /usr/bin/ollama
# !useradd -r -s /bin/false -m -d /usr/share/ollama ollama
# !pip3 install openpyxl sentencepiece protobuf evaluate rouge_score absl-py tensorboardX bitsandbytes peft accelerate python-dotenv dspy-ai graspologic


In [None]:
import os, pickle, gc, subprocess
from dotenv import load_dotenv
from video_transcription import split_audio_into_chunks, video_to_audio, transcribe_audio_chunks, get_transcription_model, download_video
from llama_index.core import Document
# import torch

load_dotenv("/workspace/repos/agentic-ai/.env")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

video_path="/workspace/data/video1512218125.mp4"
# video_path="https://www.youtube.com/watch?v=Z07Ewop7rQA"
audio_output_path="/workspace/data/video_audio.mp3"
transcribe_output_dir="/workspace/data"
file_save_path="/workspace/data/transcription.pkl"

In [None]:
if not os.path.exists(file_save_path):
    transcribe_model = get_transcription_model(open_source_model=True)

    print("Processing video...")
    if 'youtube' in video_path:
        print("   Downloading video from youtube...")
        download_video(video_url=video_path, audio_output_path=audio_output_path)
    else:
        video_to_audio(video_path=video_path, audio_output_path=audio_output_path)

    print("Splitting audio into chunks...")
    split_audio_into_chunks(audio_output_path=audio_output_path, transcribe_output_dir=transcribe_output_dir, max_chunk_size_mb=24)

    print("Transcribing audio chunks...")
    transcription = transcribe_audio_chunks(model=transcribe_model, chunk_dir="/workspace/data", file_save_path=file_save_path)

    del transcribe_model
    # torch.cuda.empty_cache()
    gc.collect()
else:
    with open(file_save_path, 'rb') as f:
        documents = pickle.load(f)
    

    if isinstance(documents[0], dict):
        documents = [Document(text=chunk["text"]) for chunk in documents]

    full_doc = " ".join([doc.text for doc in documents])
    

In [None]:
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI as LOpenAI

# model_name, ctx_len = "gpt-4o", 128000
model_name, ctx_len = "llama3.1:latest", 128000

if model_name == "gpt-4o":
    llm = LOpenAI(model=model_name, max_tokens=4000)
else:
    try: sub_out = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True)
    except Exception as e: print(f"Error pulling model: Is the Ollama server running?\n{e}")
    addtion_kwargs = {"max_new_tokens": 2000}
    system_prompt = "You are an expert at answering questions about rules and regulations regarding Title 17—Commodity and Securities Exchanges: CHAPTER II—SECURITIES AND EXCHANGE COMMISSION. Please provide a summary of the following text, and cite any sections, rules, acts or laws (e.g. § 230.503, § 240.13a-15, Act (15 U.S.C. 781), Investment Company Act of 1940) from context that support the answer. Be detailed in your response."
    llm = Ollama(model=model_name, url="http://127.0.0.1:11434", context_window=ctx_len, model_type="chat", is_function_calling_model=False, 
                request_timeout=1000.0, system_prompt=system_prompt, additional_kwargs=addtion_kwargs)
    print(llm.metadata)

Settings.llm = llm

## Tree summarizer

In [None]:
from llama_index.core.response_synthesizers import TreeSummarize
summarizer = TreeSummarize(llm=llm, verbose=True)
# prompt_summary = "You are a professional executive of AlphaTrAI. Your job is to summarize this text in great detail from a video transcription. The summary will be distributed to investors and stakeholders, so give a lot of details and examples from the transcription."
prompt_summary = f"""You are a professional executive at AlphaTrAI. Your job is to summarize the text from a video transcription. The summary will be a memo distributed to investors and stakeholders. Be sure it the memo has the following items:
1. Extract all the names of new hires and their position, and/or new advisors mentioned in the transcription.
2. Create a section to mention the personnel new to AlphaTrAI.
3. Include other highlights and progress made by AlphaTrAI.
4. Ensure the memo and ensure it is factual, optimistic, and any values mention come directly from the text. 

The transcription is as follows:\n{full_doc}"""

response = await summarizer.aget_response(prompt_summary, [doc.text for doc in documents])

In [None]:
print(response)

## LLM direct summarization

In [None]:
prompt_summary = f"""You are a professional executive at AlphaTrAI. Your job is to summarize the text from a video transcription. The summary will be a memo distributed to investors and stakeholders. Be sure it the memo has the following items:
1. Extract all the names of new hires and their position, and/or new advisors mentioned in the transcription.
2. Include other highlights and progress made by AlphaTrAI.
3. Ensure the memo is professional, fluid, factual, and optimistic. 

The transcription is as follows:\n{full_doc}"""

response = llm.complete(prompt_summary, max_tokens=5000)

In [None]:
print(response.text)

In [None]:
**AlphaTrAI Executive Memo**

**To:** Investors and Stakeholders

**From:** [Your Name], Executive at AlphaTrAI

**Subject:** Corporate Update, New Hires, and Key Progressions

As we continue to navigate the ever-evolving landscape of tech-enabled services, I am pleased to share with you our recent updates and key progressions.

**New Hires and Advisors:**

* **Anjan Jo**: An incredibly talented individual who has joined our advisory committee. She brings a wealth of knowledge and experience, and we are grateful for her continued involvement.
* **Rob Marsh**: A seasoned technology expert who has joined the firm as a member of the executive committee. He brings a deep understanding of tech-enabled services and will play a critical role in shaping our business strategy.
* **Matt Higgins**: Our new chief compliance officer, who will be instrumental in ensuring the responsible application of AI across our clients' businesses.

**New Personnel:**

* **Sammy Tarazi**: Our first official CTO, who has brought extraordinary value to the firm with his technical expertise and ability to communicate effectively.
* **John Sweeney**: A highly experienced business professional who has joined us as a key member of our team. He brings a deep understanding of the wealth management industry and will play a critical role in shaping our business strategy.

**Other Highlights:**

* We have made significant progress in consolidating our proprietary asset management business, which has allowed us to focus on delivering consultative services to our clients.
* Our pipeline is strong, with multiple SOWs (Statements of Work) already in process. Notably, we are working closely with the Financial Services Institute to explore opportunities for AI adoption and leadership.
* We have established a robust advisory committee, featuring industry legends such as Lonnie Steffens, Kathy Saunders, and Larry Anderson. This will enable us to leverage their expertise and stay ahead of the curve in terms of market trends.

**Progressions:**

* Our sales team has achieved remarkable success with Rocco's business, generating over $18 million in revenue. We expect this growth to continue as we integrate Anchor Advisory Services into our platform.
* Our tech-enabled services have seen significant traction, with multiple clients expressing interest in leveraging our capabilities. This includes Stacks Capital, which is integrating Kaya (our AI-powered due diligence tool) into their business processes.

**Conclusion:**

We are excited about the progress we've made and the opportunities that lie ahead. We believe that our focus on tech-enabled services will enable us to deliver greater value to our clients and drive growth for the firm.

Please feel free to reach out with any questions or concerns.

In [None]:
**MEMORANDUM**

**TO:** Investors and Stakeholders

**FROM:** AlphaTrAI Executive Team

**SUBJECT:** Corporate Update and Highlights

As we continue to navigate the rapidly evolving landscape of tech-enabled services, I am pleased to provide a comprehensive update on our company's progress and highlight key achievements.

**New to AlphaTrAI?**

Welcome! If you are new to our community, we encourage you to explore our platform and learn more about our mission to revolutionize the way businesses operate. Our team is committed to transparency and communication, and we invite you to join us on this exciting journey.

**Key Personnel**

We would like to take a moment to acknowledge and appreciate the contributions of our talented team members:

* **Bill**: Executive at AlphaTrAI
* **Steve Rocco**: Founder and CEO of Anchor Advisory Services (recently acquired by AlphaTrAI)
* **Sammy Tarazi**: Chief Technology Officer (CTO) at AlphaTrAI
* **John Sweeney**: Member of the Executive Committee at AlphaTrAI
* **Rob Marsh**: Member of the Executive Committee at AlphaTrAI and former CEO of Kensho
* **Matt Higgins**: Compliance Officer at AlphaTrAI

**Highlights and Progress**

We are proud to announce significant milestones in our journey:

* Acquisition of Anchor Advisory Services, expanding our capabilities in sales and distribution services.
* Appointment of key personnel, including Steve Rocco as CEO of Anchor Advisory Services and Sammy Tarazi as CTO of AlphaTrAI.
* Development of Kaya, a groundbreaking AI-powered due diligence platform for private investments.
* Expansion of our advisory committee with industry legends, including Lonnie Steffens and Kathy Saunders.

**Other Notable Achievements**

We have made substantial progress in various areas:

* Consolidation of our proprietary asset management business to focus on tech-enabled services.
* Establishment of a strong pipeline and successful delivery of sales and distribution services through Anchor Advisory Services.
* Development of customized SaaS deliverables, enabling us to differentiate ourselves from traditional software-as-a-service providers.

**Conclusion**

We are thrilled with the progress we have made and look forward to continuing our journey towards innovation and excellence. We invite you to join us on this exciting path, as we strive to revolutionize the way businesses operate through the application of technology and expertise.
...

Best regards,
[Your Name]
AlphaTrAI Executive Team
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...

In [None]:
'3200' in full_doc

In [None]:
print(full_doc)

In [None]:
**MEMORANDUM**

**TO:** Investors and Stakeholders

**FROM:** [Your Name], Executive at AlphaTrAI

**SUBJECT:** Corporate Update and Exciting Developments

We are pleased to provide a summary of our recent video transcription, highlighting key updates and developments within the company.

**Corporate Update:**
The past 24-36 months have been challenging for tech startups, with over 3,200 closures and a loss of $27 billion in startup capital. Despite this backdrop, we have navigated these difficulties to emerge stronger and more confident in our future prospects. Our pivot to tech-enabled services has been a major success, leveraging our expertise in wealth and asset management to provide consultative services and customized products for clients.

**Key Developments:**

1. **Steve Rocco's Business:** Anchor Advisory Services has joined the AlphaTrAI team, bringing a wealth of experience in sales and distribution. With their expertise, we can now offer more comprehensive solutions to our clients.
2. **Rob Marsh's Addition:** Rob has joined our executive committee, bringing his technology background and PM expertise to further enhance our capabilities.
3. **Matt Higgins' Compliance Role:** Matt has taken on the role of Chief Compliance Officer, ensuring that our customers receive the highest level of support in responsible AI application.
4. **Kaya, the Capital Artificial Intelligence Analyst:** Our team has developed an agent-based solution, leveraging large language models and retrieval augmented generations to automate due diligence for private real estate deals. This innovation has reduced processing times from 10 days to just three days.

**Growth and Expansion:**

We are excited about our growth prospects, with a strong pipeline and plans to expand our advisory committee with industry legends like Lonnie Steffens and Kathy Saunders. Our branding efforts will ensure that we effectively communicate the value of our tech-enabled services to the market.

**Conclusion:**
We believe that these developments demonstrate our commitment to innovation and customer satisfaction. We invite you to join us on this exciting journey, as we continue to push the boundaries of what is possible in the world of AI and asset management. Thank you for your continued support!

In [None]:
!cp '/workspace/repos/agentic-ai/PPM - MCG MADISON RIDGE DST.pdf' /workspace/data
!cp '/workspace/repos/agentic-ai/MASTER - PYTHON - SCORING MODEL - MCG MADISON RIDGE DST - v2.0.xlsx' /workspace/data
!apt update
!apt install tmux vim -y
!pip3 install llama-index llama-parse llama-index-embeddings-huggingface llama-index-llms-ollama
!pip3 install openpyxl sentencepiece protobuf evaluate rouge_score absl-py tensorboardX bitsandbytes peft accelerate python-dotenv graspologic
!pip3 install flash-attn --no-build-isolation
!curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | tar zx -C /usr


!ollama pull llama3.1:latest

In [None]:

import json
import os
from dotenv import load_dotenv
from llama_index.llms.ollama import Ollama

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import (
    Settings,
)

import nest_asyncio
nest_asyncio.apply()

load_dotenv('/workspace/repos/agentic-ai/.env')
access_token = os.getenv('HF_TOKEN')
llama_api_key = os.getenv('LLAMA_API_KEY')

# model_name, num_ctx = "mistral-nemo", 128000
model_name, num_ctx = "llama3.1", 128000

addtion_kwargs = {"max_new_tokens": 3000}
system_prompt = "You are an expert in creating marketing materials for financial firms. You take corporate documents and turn them into marketing materials."
llm = Ollama(model=model_name, url="http://127.0.0.1:11434", context_window=num_ctx, model_type="chat", is_function_calling_model=False, 
             request_timeout=1000.0, system_prompt=system_prompt, additional_kwargs=addtion_kwargs)
Settings.llm = llm
llm.metadata

In [None]:

Settings.chunk_size = 200
Settings.chunk_overlap = 50
embed_model_name = "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
# embed_model_name = "BAAI/bge-small-en-v1.5"
# embed_model_name = "hkunlp/instructor-base"
print("loading embed model...")
embed_model = HuggingFaceEmbedding(model_name=embed_model_name, device="cuda")

Settings.embed_model = embed_model


In [None]:
from llamaindex_data_utils import extract_text_from_pdf
pdf_urls = ["/workspace/data/AIP - Capital Raise Proposal.pdf", "/workspace/data/Access Pre-IPOs Presentation June 2024.pdf"] 
llamaparse_kwargs={'result_type':"markdown", 'split_by_page':False}
documents = extract_text_from_pdf(pdf_urls, llama_api_key, llamaparse_kwargs=llamaparse_kwargs, save_json_path="/workspace/data/pdf_text.json")

In [None]:
from rag_utils import create_llama_query_engine_rag
persist_dir="/workspace/data/vector_index"
query_engine = create_llama_query_engine_rag(llm, embed_model, persist_dir=persist_dir, documents=documents, vector_store_kwargs={})

In [None]:
prompt = "You have been given a documents that describe the firm's investment strategy. Highlight portions of the document that include a unique opportunity to invest in pre-IPO companies and put investors on a level playing field with VCs and big institutional investors. You need to use this information to create a marketing brochure that will attract investors to the firm's products. The brochure should be informative, engaging, and persuasive. You must disclose in fine print that there are no guarantees, and investing is risky so nothing promissory."
response = query_engine.query(prompt)
print(response.response)

In [None]:
from llama_index.core import get_response_synthesizer
from llama_index.core import DocumentSummaryIndex
from llama_index.core.node_parser import SentenceSplitter

In [None]:
# splitter = SentenceSplitter(chunk_size=4000, chunk_overlap=100)
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize", use_async=True
)
doc_summary_index = DocumentSummaryIndex.from_documents(
    documents,
    llm=llm,
    # transformations=[splitter],
    response_synthesizer=response_synthesizer,
    show_progress=True,
)

In [None]:
prompt_summary = "Use specific details from the given documents to create a professional marketing letter that highlights portions of the context including a unique opportunity to invest in pre-IPO companies and put investors on a level playing field with VCs and big institutional investors, qualifications of the investment team, and why they should invest now. Use details from the documents, but do not make up information. Be professional and persuasive. At the end of the letter, you must disclose in fine print that there are no guarantees, and investing is risky."
response = await summarizer.aget_response(prompt_summary, [doc.text for doc in documents[1:]])

In [None]:
print(response)

In [None]:
for node in response.source_nodes:
    print(node.text)