In [1]:
import os
import pandas as pd
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain.agents import AgentType
from langchain import FewShotPromptTemplate
from langchain.prompts import PromptTemplate
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from rouge_score import rouge_scorer
from sentence_transformers import SentenceTransformer

In [2]:
os.environ["OPENAI_API_KEY"] = ""

os.environ["GOOGLE_API_KEY"] = ""

In [None]:
df = pd.read_csv("finans.csv")
df

In [None]:
#llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
#llm = ChatOllama(model="gemma2:9b", temperature=0)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)

print(type(llm))

In [None]:
agent = create_pandas_dataframe_agent(
                llm,
                df,
                verbose=True,
                agent_type=AgentType.OPENAI_FUNCTIONS,
                # agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                allow_dangerous_code=True,
                max_iterations=3,
                number_of_head_rows=df.shape[0],
        )

query = """

For every response, follow this strict format:

1. Thought: Provide your thought process.
2. Action: State the action you will take (e.g., analyze_column).
3. Action Input: Provide the input for the action (e.g., the column name).
4. Final Answer: Provide your conclusion or analysis.

Can you describe the given dataset? What is the dataset about? What are the columns and what do they represent?
Think step by step.
"""
try:
    response = agent.invoke(query)["output"]
except Exception as e:
    response = str(e) 


In [None]:
response

In [8]:
examples = [
    {
        "agent_output": """An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Parsing LLM output produced both a final answer and a parse-able action:: 1. **Thought:** The dataset appears to track financial metrics over time. The first column, "Metrics", likely lists the specific financial measures being tracked. The remaining columns represent different years from 2014 to 2023.


2. **Action:** python_repl_ast
3. **Action Input:** print(df.head())
4. **Observation:**  

```
            Metrics       2014       2015       2016       2017       2018  \
0      Net Revenue   71537.14  122985.28  191166.93  210916.53   28892.26   
1  Operating Costs   13802.18   90355.33  132564.46   65384.14  278429.73   
2     Gross Profit   37777.14  122216.88   46822.39  194469.15  148443.40   
3            Taxes  179213.45  219638.18  131592.86  209942.17  185695.02   
4       Net Income  297151.16   83893.72   62105.06   47566.50   61001.34   

        2019       2020       2021       2022       2023  
0  158131.90   55412.42   44390.00  197604.56  125449.27  
1   66884.82   25662.97  165766.93  102527.72   49780.61  
2  191415.56  295809.37  134085.72  227645.00   18907.24  
3  190911.59   78370.23  230701.83  120794.54  199445.19  
4   45859.36  182117.34  202499.03  184534.90  286222.75 
```

Thought: The dataset contains financial metrics for different years. Each row represents a specific metric (e.g., Net Revenue, Operating Costs), and each column represents a year from 2014 to 2023.



Final Answer: The dataset tracks various financial metrics over time. It includes""",

        "summary": """The financial table provided tracks different financial metrics over a span of ten years, from 2014 to 2023. The first column, labeled "Metrics," lists specific financial measures such as Net Revenue, Operating Costs, Gross Profit, Taxes, and Net Income. Each row corresponds to a particular metric, while each subsequent column represents a different year. The table showcases the values of these metrics for each year, offering a comprehensive overview of the company's financial performance over the specified period.""",
    },
    {
        "agent_output": """An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Parsing LLM output produced both a final answer and a parse-able action:: 1. **Thought:** The provided data appears to be a table with numerical values.  We need to examine each row and column to understand its meaning. 

2. **Action:** analyze_columns

3. **Action Input:** All columns

4. **Final Answer:** The dataset seems to track financial or market data over a period of time (likely daily). Here's a breakdown:

* **Column 1 (Number):**  Likely a unique identifier for each day (e.g., record number).
* **Column 2 (Date):** Represents the date for each data point.
* **Columns 3-8:** These columns likely contain numerical values representing different financial indicators. Without further context, it's difficult to say precisely what they represent.  Possible interpretations include:
    * Opening price, closing price, high/low prices of a security or market index.
    * Trading volume, market capitalization, or other financial metrics.

To get a more accurate understanding, we would need additional information about the source of the data and the specific definitions of each column. 
""",

        "summary": """The financial table provided appears to track financial or market data over a period of time, likely on a daily basis. 

The columns in the table are as follows:
1. Column 1 (Number): Likely a unique identifier for each day, such as a record number.
2. Column 2 (Date): Represents the date for each data point.
3. Columns 3-8: These columns likely contain numerical values representing different financial indicators. Without further context, it is challenging to precisely determine what they represent. Possible interpretations include opening price, closing price, high/low prices of a security or market index, trading volume, market capitalization, or other financial metrics.

To gain a more accurate understanding of the data, additional information about the source of the data and the specific definitions of each column would be required.""",
    },

]

example_template = """
Agent Output: {agent_output}
Summary: {summary}
"""

example_prompt = PromptTemplate(
    input_variables=["agent_output", "summary"],
    template=example_template
)

prefix = """
You will be given an output from an AI agent that is responsible to analyze financial tables.

Can you provide a summary of the given text? Do not include any insights or trends or any other analysis. Just describe the columns and what they represent, and the rows and what they represent. And in general what is this table?

Just focus on what the financial table is about, ignore the actions, observations, errors and other logs that the agent produced.

Tell the findings and descriptions like you are reporting to finance banking CEO in a good storytelling way. Use the original words from the agent output as much as possible.

Examples: 

"""

suffix = """
{format_instructions}
Agent Output: {agent_output}
Summary:
"""

prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["agent_output"],
    example_separator="\n\n"
)

In [9]:
class Summary(BaseModel):
    text: str = Field(description="Summary of the given text.")

parser = PydanticOutputParser(pydantic_object=Summary)

In [10]:
chain = prompt | llm | parser 

In [11]:
second_response = chain.invoke({"format_instructions": parser.get_format_instructions(), "agent_output": response}).text

In [None]:
print(second_response)

In [None]:
# ROUGE SCORE

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rouge3', 'rouge4', 'rougeL'], use_stemmer=True)

scores = scorer.score(second_response, response)

scores

# DRAW GRAPH
# AUTO LLM EVALUATE

In [None]:
# semantic textual similarity
semantic_model = SentenceTransformer("mrm8488/gte-large-ft-webinstruct",trust_remote_code=True)

embeddings1 = semantic_model.encode(response)
embeddings2 = semantic_model.encode(second_response)

similarities = semantic_model.similarity(embeddings1, embeddings2)
print(similarities)

In [None]:
similarities[0][0].item()

In [None]:
from FlagEmbedding import BGEM3FlagModel

model = BGEM3FlagModel('BAAI/bge-m3',  use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

output_1 = model.encode(response, return_dense=True, return_sparse=True, return_colbert_vecs=False)
output_2 = model.encode(second_response, return_dense=True, return_sparse=True, return_colbert_vecs=False)


In [None]:
from FlagEmbedding import BGEM3FlagModel

# Initialize the model
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)



# Encode the two texts
output_1 = model.encode(response, return_dense=True, return_sparse=True, return_colbert_vecs=False)
output_2 = model.encode(second_response, return_dense=True, return_sparse=True, return_colbert_vecs=False)

# Check the lexical weights of the first text
lexical_weights_text1 = model.convert_id_to_token(output_1['lexical_weights'])
lexical_weights_text2 = model.convert_id_to_token(output_2['lexical_weights'])

# Print token and their lexical weights
print(f"Lexical weights for text1: {lexical_weights_text1}")
print(f"Lexical weights for text2: {lexical_weights_text2}")
# Filter out non-string keys from lexical weights
def filter_lexical_weights(lexical_weights):
    return {k: v for k, v in lexical_weights.items() if isinstance(k, str)}

# Get the filtered lexical weights
filtered_lexical_weights_1 = filter_lexical_weights(output_1['lexical_weights'])
filtered_lexical_weights_2 = filter_lexical_weights(output_2['lexical_weights'])

# Compute lexical matching score between the two filtered texts
lexical_scores = model.compute_lexical_matching_score(filtered_lexical_weights_1, filtered_lexical_weights_2)

# Print the lexical matching score
print(f"Lexical matching score: {lexical_scores}")


In [None]:
output_1['lexical_weights']

In [None]:

# compute the scores via lexical mathcing
lexical_scores = model.compute_lexical_matching_score(output_1['lexical_weights'][0], output_2['lexical_weights'][0])
print(lexical_scores)

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

# Sentences we want sentence embeddings for


# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Tokenize sentences
encoded_input = tokenizer([response, second_response], padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)

# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

# Normalize embeddings
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

# Calculate cosine similarity
cosine_sim = F.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)

print("Sentence embeddings:")
print(sentence_embeddings)

print("\nCosine similarity between the two sentences:")
print(cosine_sim.item())


In [23]:
# ASKING TO OTHER LLMs
other_llm = ChatOllama(model="llama3.1:latest", temperature=0)

In [24]:
prompt_string = """
You will be given two texts. Your task is to compare the two texts and detect if they express the same information or not.

TEXT 1: {text1}
TEXT 2: {text2}

Please answer with "Yes" if the two texts express the same information, or "No" otherwise.

{format_instructions}
"""

prompt = PromptTemplate(
    input_variables=["text1", "text2"],
    template=prompt_string
)

class OtherCheck(BaseModel):
    check: str = Field(description="Whether the two texts express the same information or not. (Yes or No)")

parser = PydanticOutputParser(pydantic_object=OtherCheck)

In [25]:
other_chain = prompt | other_llm | parser

In [26]:
other_response = other_chain.invoke({"text1": response, "text2": second_response, "format_instructions": parser.get_format_instructions()})

In [None]:
print(other_response)