In [None]:
# TODO: notify if ollama server is running with model loaded
import subprocess, os
from llama_index.core import Document
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI as LOpenAI
from dotenv import load_dotenv
load_dotenv('/workspace/repos/agentic-ai/.env')

model_name, ctx_len = "llama3.1:8b-instruct-q8_0", 128000
# model_name, ctx_len = "qwen2.5:3b-instruct-q8_0", 128000
# ollama pull hf.co/mradermacher/SaulLM-54B-Instruct-i1-GGUF:Q6_K
# ollama pull hf.co/mradermacher/SaulLM-54B-Instruct-i1-GGUF:Q4_K_M
# ollama pull hf.co/bartowski/Llama-3.1-Nemotron-70B-Instruct-HF-GGUF:Q4_K_M


if "gpt-4o" in model_name:
    openai_key = os.getenv("OPENAI_API_KEY")
    os.environ["OPENAI_API_KEY"] = openai_key
    
    print(f"Using OpenAI {model_name}...")
    llm = LOpenAI(model=model_name, max_tokens=8000)
else:
    subout = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
    if model_name in subout.stdout:
        print('Model loaded...')
    else:
        try: 
            print("Pulling Ollama model...")
            sub_out = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True)
        except Exception as e: 
            print(f"Error pulling model: Is the Ollama server running?\n{e}")
    
    system_prompt = "You are training an new Portfolio Manager of a hedgefund."
    additional_kwargs = {"num_predict": 1000}
    llm = Ollama(model=model_name, url="http://127.0.0.1:11434", context_window=ctx_len, model_type="chat", is_function_calling_model=True, 
                 request_timeout=4000.0, additional_kwargs=additional_kwargs, keep_alive=0) #, system_prompt=system_prompt)
    print(llm.metadata)

# Settings.llm = llm

In [None]:
import json, os
from llama_parse import LlamaParse
from llama_index.core import Document
from dotenv import load_dotenv
load_dotenv('/workspace/repos/project-mayhem/.env')

import nest_asyncio
nest_asyncio.apply()

llama_api_key = os.getenv("LLAMA_API_KEY")

def extract_text_from_pdf(pdf_urls, llama_api_key, llamaparse_kwargs={}, save_json_path=None):
    
    parser = LlamaParse(api_key=llama_api_key, **llamaparse_kwargs)
    
    documents = []
    for pdf in pdf_urls:
        print('processing pdf:', pdf)
        documents += parser.load_data(pdf)

    if save_json_path:
        with open(save_json_path, "r") as f:
            result = json.load(f)
            documents.append(Document(text=result['text']))
    
    return documents

pages_to_extract = ""
beginning_of_chapter = 21
end_of_chapter = 54 # 644
for i in range(beginning_of_chapter,end_of_chapter):
    if i == end_of_chapter - 1:
        pages_to_extract += str(i)
    else:
        pages_to_extract += str(i) + ","
principles_of_finance = "https://assets.openstax.org/oscms-prodcms/media/documents/PrinciplesofFinance-WEB.pdf"
documents = extract_text_from_pdf([principles_of_finance], llama_api_key, llamaparse_kwargs={"split_by_page": False, "target_pages": pages_to_extract}, save_json_path=None)


In [None]:
from llama_index.core.node_parser import SemanticSplitterNodeParser
SemanticSplitterNodeParser?

In [None]:
import json
with open("/workspace/data/principles_of_finance.json", "w") as f:
    json.dump([d.text for d in documents], f)

In [None]:
import json
with open("/workspace/data/principles_of_finance.json", "r") as f:
    documents = [Document(text=text) for text in json.load(f)]

In [None]:
from llama_index.embeddings.ollama import OllamaEmbedding
# OLLAMA_HOST="http://127.0.0.1:11436" ollama start 
uvu_policy_manual_embed_model = OllamaEmbedding(
    model_name="llama3.2:1b-instruct-q8_0", # other embed option "bge-m3"
    # model_name="bge-m3", # other embed option "bge-m3"
    base_url="http://127.0.0.1:11436",
    ollama_additional_kwargs={"mirostat": 0, "keep_alive": 0},
)

In [None]:
from llama_index.core.node_parser import SemanticSplitterNodeParser

splitter = SemanticSplitterNodeParser(buffer_size=1, embed_model=embed_model, include_metadata=True)
nodes = splitter.get_nodes_from_documents(documents, show_progress=True)
print(f"Number of nodes: {len(nodes)}")

In [None]:
from datasets import load_dataset
import json

data_path = "microsoft/orca-agentinstruct-1M-v1"
dataset = load_dataset(data_path)
orca_keys = list(dataset.keys())

okey = orca_keys[1]
teststr = dataset[okey][0]['messages']
# Convert the string to a list of dictionaries
list_of_dicts = json.loads(teststr)

print(okey)
list_of_dicts

In [None]:
from instruction_prompts import all_questions
from parse_instruction_output import all_parsers

In [None]:
question_list = list(all_questions.keys())

In [None]:
response = llm.complete(all_questions['word_definition'].format(financial_text=nodes[0].text))

In [None]:
doc="""Importance of Macroeconomic Variables in Financial Markets
To make financial forecasts, managers need good information to understand the relationship among several
economic variables. Working from small to large, sales forecasts estimate the likely price and quantity of
goods sold. In doing so, the forecaster will consider local, regional, state, national, and international economic
conditions. Inflation is an important macroeconomic variable that influences prices. Every quarter, financial
information hubs, such as the Wall Street Journal (WSJ), and government agencies and regulatory bodies, such
as the Treasury Department and the Federal Reserve, release estimates about expected and current inflation.
This information informs policy makers how to adjust the money supply to meet target objectives. Financial
forecasters pay close attention to current and expected interest rates, as they have a fundamental impact on
the cost of raising money and determining the required rate of return for investment.
The unemployment rate helps inform financial forecasters about the expected cost of labor and the ability of
employers to hire people if a firm plans to increase the production of goods or services. The stock market is a
forward-looking macroeconomic variable and measures investor expectations about future cash flows and
economic growth. Political economic variables such as changes in regulation or tax policy can also affect
forecasting models.
LINK TO LEARNING
Politics and Stock Markets
Politics and stock market returns make for heated conversations. Who can run the country’s economic
system better, Republicans or Democrats? Presidents often take the credit or blame for overall economic
performance even though their actual influence is less than you might think. See this fun comparison chart
(https://openstax.org/r/macrotrends-net) of political economic performance that measures stock market
returns by each administration going back to President Warren Harding in 1920. Who had the highest
overall increase in the market? What president in the 21st century oversaw an overall decline in the market?
Just to get things going, who had better overall market returns after four years in office, Donald Trump or
Barack Obama?
Each of the variables we have identified—inflation, interest rates, unemployment, economic growth, the stock
market, and government fiscal policy—are macroeconomic factors. They are beyond the scope and influence
of individual firms, but combined, they play a critical role in establishing the market in which firms compete. A
better understanding of the interaction of these macro variables with each other and with individual micro or
firm-specific variables can only strengthen financial forecasting and management decision-making.
CONCEPTS IN PRACTICE
Here, There, and Everywhere: Where Did Your iPhone Come From?
How do international macroeconomic factors affect investment decisions for businesses and individuals?
Foreign investment adds risk and potential return to the decision-making process. Macroeconomic factors
such as different inflation rates, unexpected changes in currency exchange rates, and mismatched
economic growth all add to the uncertainty of making investments abroad. Just as important are
government regulations limiting pollution, exploitation of precious minerals, labor laws, and tariffs. Toss in
a pandemic, and a bottleneck or two, and suddenly international macroeconomic factors can affect almost
every aspect of commerce and international trade.
24 1 • Introduction to Finance
Access for free at openstax.org
For example, how far did your new iPhone travel before it got into your hands? Apple is an American
company headquartered in Cupertino, California, and worth over $2 trillion.8 However, your phone may
have visited as many as six continents before it reached you. Each location touched by the Apple corporate
hand requires an understanding of the financial impact on the product cost and a comparison with
alternative designs, resources, suppliers, manufacturers, and shippers. This is where finance can get really
fun!
Relationship between Microeconomics and Macroeconomics
In the parable, a group of blind people happen upon an elephant for the first time, and they each touch one
part—but one part only—of the elephant. Subsequently, when they each describe what they have discovered,
the descriptions are vastly different. The group's members become upset, accusing one another of inaccurate
descriptions or worse. The parable demonstrates how individuals can make absolute truths from their own
limited and subjective information. Financial decision makers run a similar risk, if they choose to recognize
only their own findings and ignore other microeconomic or macroeconomic information and the interaction of
these factors.
A common view to understanding economics states that macroeconomics is a top-down approach and
microeconomics is a bottom-up approach. Financial decision makers need to see both the forest and the
individual trees to chart a course and move toward a strategic objective. They need both the macro data, so
important for strategic thinking, and the micro data, required for tactical movement. For example, the national
rate of unemployment may not have been much help when Bacon Signs was searching for skilled laborers
who could form neon signs. However, the unemployment rate helped inform the company about the
probability of demand for new businesses and the signs they would need."""

In [None]:
from llama_index.embeddings.ollama import OllamaEmbedding
# OLLAMA_HOST="http://127.0.0.1:11436" ollama start 
embed_model = OllamaEmbedding(
    model_name="llama3.2:1b-instruct-q8_0", # other embed option "bge-m3"
    # model_name="bge-m3", # other embed option "bge-m3"
    base_url="http://127.0.0.1:11436",
    ollama_additional_kwargs={"mirostat": 0, "keep_alive": 0},
)

In [None]:
documents = [Document(text=doc)]

In [None]:
from llama_index.core.node_parser import SemanticSplitterNodeParser

splitter = SemanticSplitterNodeParser(buffer_size=1, embed_model=embed_model, include_metadata=True)
nodes = splitter.get_nodes_from_documents(documents, show_progress=True)
print(f"Number of nodes: {len(nodes)}")

In [None]:


task_list = {
    "antonym_relation":("Generate a question asking for an antonym of a word in the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "one_sentence_description":("Provide a one-sentence description of the given text.", "Output:\nReasoning:\n"),
    "interview_question_answering":("Generate an interview question and answer based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "idiom_meaning":("Generate an idiom from the given text.", "Output:\nReasoning:\n"),
    "explain_behavior":("Generate human behavior in relation to the provided text, and provide an explanation of the behavior as the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "question_answering_generation_from_facts":("Generate a question and answer pair based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "analogy_completion_given_3_of_4_words":("Generate an analogy from the given text. Leave one word blank. Provide the missing word as the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "explain_complex_concept_to_someone_without_background":("Generate an explanation of a complex concept from the given text, as if explaining it to someone without any background knowledge.", "Output:\nReasoning:\n"),
    "fun_math_question":("Generate a fun math question, with answers, based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "perfect_and_exact_numbers":("Generate a question that requires the use of exact numbers, based on the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "story_composition":("Generate a story based on the given text.", "Output:\nReasoning:\n"),
    "solving_equation":("Generate an equation to solve, with its answer, based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "synonym_generation":("Generate a question asking for a synonym for a word in the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "grammar_error_correction":("Select a few sentences from the given text and rewrite them to include grammar and spelling mistakes. Provide the original sentences", "Output:\nAnswer:\nReasoning:\n"),
    "correct_misspelling":("Select a few sentences from the given text and rewrite them to include spelling mistakes. Provide the original sentences", "Output:\nAnswer:\nReasoning:\n"),
    "math_word_problem_with_reasoning":("Generate a math word problem with reasoning and answer based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "unethical_behavior":("Generate an example of unethical behavior based on the given text.", "Output:\nReasoning:\n"),
    "text_to_conversation":("Generate a conversation based on the given text.", "Output:\nReasoning:\n"),
    "metaphor_for_a_situation":("Generate a metaphor for a situation based on the given text.", "Output:\nReasoning:\n"),
    "better_word_choice":("Generate a question that asks for a better word choice for a word in the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "sentence_with_specified_ending":("Specify an ending to a sentence, and generate a sentence with the specified ending based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "word_definition":("Extract a word from the given text and generate a definition for the word.", "Output:\nAnswer:\nReasoning:\n"),
    "text_summarization":("Generate a summary of the given text.", "Output:\nReasoning:\n"),
    "fill_in_the_masked_word":("Generate a sentence with a masked word based on the given text. Provide the masked word as the answer.", "Output:\nAnswer:\nReasoning:\n"),    
    "writing_article_from_outline":("Write an outline for an article based on the given text.", "Output:\nReasoning:\n"),
    "socratic_questioning":("Generate a series of Socratic questions based on the given text.", "Output:\nReasoning:\n"),
    "paper_title_generation":("Generate a title for a paper based on the given text.", "Output:\nReasoning:\n"),
    "start_professional_conversation_from_text":("Generate the start of a professional conversation based on the given text.", "Output:\nReasoning:\n"),
    "numerical_question_answering":("Generate a numerical based question and answer based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "ethical_and_behavioral_interview_questions":("Generate ethical and behavioral interview questions based on the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "meaning_to_phrase":("Generate a phrase that conveys the meaning of the given text.", "Output:\nReasoning:\n"),
    "paraphrase_a_paragraph":("Paraphrase a paragraph based on the given text.", "Output:\nReasoning:\n"),
    "ner_fill-in-the-blank":("Generate a fill in the blank question with the named entities as answers from the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "explain_concept_like_i_am_5":("Explain a concept based on the given text as if you are explaining it to a 5-year-old.", "Output:\nReasoning:\n"),
    "solving_math_problem_with_intermediate_steps":("Generate a math problem with intermediate steps and answer based on the given text.", "Output:\nAnswer:\nReasoning:\n"),
    "ethical_dilemma_based_on_text":("Generate an ethical dilemma based on the given text. Provide a solution.", "Output:\nAnswer:\nReasoning:\n"),
    "fill_in_the_blank":("Generate a fill in the blank question based on the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "paraphrasing_classification_for_two_sentences":("Based on the given text, generate two sentences that are either the same paraphrased versions of the original text, or different paraphrased versions of the original text. Randomly select whether the sentences are the same or not. Provide the classification.", "Output 1:\nOutput 2:\nAnswer:\nReasoning:\n"),
    "ethicality_judgement":("Generate a question that asks for the ethicality of a situation based on the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "topic_classification":("Generate a question that asks for the topic of the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "fact_verification":("Generate a question that asks for the verification of a fact based on the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "most_relevant_passage_from_user_query":("Generate a query from the given text and ask for the most relevant passage based on the query as the answer.", "Output:\nAnswer:\nReasoning:\n"),
    "support_classification_of_text":("Given the text, generate a claim that either supports the text or does not support the text. Provide the classification.", "Output:\nAnswer:\nReasoning:\n"),
    "fact_checking_based_on_knowledge":("Generate a question that asks for the fact-checking of a statement based on the given text. Provide the answer.", "Output:\nAnswer:\nReasoning:\n"),
}


In [None]:
meta_prompt = """You are a prompt engineering assistant. Your task is to create high-quality prompt templates for language models that will generate synthetic training data for the task: '{task_description}'. The purpose of these prompt templates is to instruct the language model to generate examples that match the specified output format. Your prompt should include detailed instructions and reasoning for the language model to follow. Here is the full description of the task:
Full Task Description:
{full_description}

When designing a prompt template:

1. Include these three user provided string format parameters in the prompt template:
- {{context_text}}: Input text to base generations on
- {{special_instructions}}: Any specific requirements
- {{num_generate}}: Number of examples to generate

2. Include these sections:
Task Overview:
[Explanation derived from task_description and full_description]

Input Context:
{{context_text}}

Special Instructions:
{{special_instructions}}

Generation Instructions:
Generate {{num_generate}} examples that match the specified output format.
For each example:
1. Use the input context to generate the example
2. Provide step-by-step reasoning for each generation
3. Follow the exact format output as follows:
{output_format}

{response_guidelines}
"""

In [None]:
of='Output: [Question with context]\nAnswer: [The answer to the generated question]\nReasoning: [A step-by-step explanation justifying the generation]\n'
response_guidelines="""Response Guidelines:
- Do not use Markdown or HTML formatting
- Do not mention the existence of the text Input Context
- Each generation must include explicit reasoning
- Follow the exact output format specified
- Stay consistent with task objectives
- Focus on realistic, high-quality examples
- Only output the prompt template without any pretext information, meta-commentary or formatting"""

In [None]:
# task= "antonym_relation"
task='paraphrasing_classification_for_two_sentences'
# task_gen_prompt = llm.complete(meta_prompt.format(task_description=task, full_description=task_list[task][0], output_format=task_list[task][1]))
task_gen_prompt = llm.complete(meta_prompt.format(task_description=task, full_description=task_list[task][0], output_format=task_list[task][1], response_guidelines=response_guidelines))

In [None]:
print(task_gen_prompt.text)

In [None]:
response = llm.complete(task_gen_prompt.text.format(context_text=nodes[0].text, special_instructions="", num_generate=3)+f"\n\n{response_guidelines}")

In [None]:
print(response.text)

In [None]:
def parse_output(input_text):
    lines = input_text.split('\n')
    outputs = []
    answer = ""
    reasoning = ""
    
    for line in lines:
        if line.startswith("Output:") or line.startswith("Output "):
            outputs.append(line.split(":", 1)[1].strip())
        elif line.startswith("Answer:"):
            answer = line[len("Answer:"):].strip()
        elif line.startswith("Reasoning:"):
            reasoning = line[len("Reasoning:"):].strip()
    
    return outputs, answer, reasoning

In [None]:
test="""Here are three examples that match the specified output format:


**Example 1**

Output: To make financial forecasts, managers need good information to understand the relationship among several economic variables.
Answer: Different
Reasoning: The two sentences are different paraphrased versions of the original text. While both sentences convey the importance of understanding economic relationships for financial forecasting, they use distinct wording and phrasing.


**Example 2**

Output: Every quarter, financial information hubs release estimates about expected and current inflation.
Answer: Different
Reasoning: The two sentences are different paraphrased versions of the original text. Although both sentences mention quarterly releases of inflation data, they use distinct language and formatting.


**Example 3**

Output: 
Financial forecasters pay close attention to current and expected interest rates because they significantly impact the cost of raising money.
Answer: Different
Reasoning: The two sentences are different paraphrased versions of the original text. While both sentences discuss the importance of interest rates for financial forecasting, they use distinct wording and phrasing to convey this idea.
"""

In [None]:
parse_output(test)

In [None]:
import re 
def parse_output(output): 
    output = output.replace('', '') 
    pattern = re.compile(r"Output(?: \d)?:\s(.?)\n?\n?Answer:\s(.?)\n?\n?Reasoning:\s(.*?)(?=\n?\n?Output|\Z)", re.DOTALL)
    matches = pattern.findall(output) 
    parsed_examples = [{'output':match[0].strip(), 'answer':match[1].strip(), 'reasoning':match[2].strip()} for match in matches] 
    return parsed_examples

In [None]:
import re

def parse_output(output):
    # Define the pattern to match "Output 1" and optionally "Output 2"
    pattern = r"Output 1: (.*?)\n(?:Output 2: (.*?))?\n"
    match = re.search(pattern, output, re.DOTALL)
    
    if match:
        output1 = match.group(1)
        output2 = match.group(2) if match.group(2) else None
        return output1, output2
    else:
        return None, None

# Example usage
test = "Output 1: Value1\nOutput 2: Value2\n"
output1, output2 = parse_output(test)
print(f"Output 1: {output1}, Output 2: {output2}")