## Simi Search with CoA

In [2]:
from app.services.vectorise_data import get_embedding

response = await get_embedding("salary payment to developers", 'retrieval.query')

INFO:app.services.vectorise_data:Requesting embedding from Jina AI
DEBUG:app.services.vectorise_data:Successfully received embedding from Jina AI


In [None]:
import voyageai
import os 
from dotenv import load_dotenv
from typing import Literal
load_dotenv()

voyage_api_key = os.getenv("VOYAGE_API_KEY")

client = voyageai.Client(api_key=voyage_api_key)
input_type = Literal["document", "query"]

response = client.embed(
    texts="salary payment to developers",
    model="voyage-finance-2",
    input_type="document"
)
response.embeddings

In [None]:
from app.services.xero import vectorise_coa

await vectorise_coa("ALL")

### Import bank data from csv

In [8]:
import pandasai as pai
import pandas as pd
import ast
from dotenv import load_dotenv
import os 

load_dotenv()

# pai_api_key = os.getenv("OPENAI_API_KEY")
# if not pai_api_key:
#     raise ValueError("PAI_API_KEY is not set")
# else:
#     print(f"OPENAI_API_KEY is set")

df = pd.read_csv("bank_data_23-25/barclays_072.csv")

# # Sample DataFrame
# df = pai.DataFrame(df)

# pai.api_key.set(pai_api_key)


### extract currency and amount
df['currency'] = df['transactionAmount'].apply(lambda x: ast.literal_eval(x)['currency'])
df['amount'] = df['transactionAmount'].apply(lambda x: float(ast.literal_eval(x)['amount']))
df['amount'] = (df['amount'] * 100).astype(int)
df = df.drop('transactionAmount', axis=1)

## setting datetimeindex
df['bookingDate'] = pd.to_datetime(df['bookingDate'])
df.set_index('bookingDate', inplace=True)
df = df.drop(['valueDate', 'bookingDateTime', 'valueDateTime', 'internalTransactionId'], axis=1)
df = df.rename(columns={'remittanceInformationUnstructured': 'remittanceInfo'})

# df.chat("What are the total expenses for 2024?")


### LLM Reconciliation Time

- Add new columns "coa_agent", "coa_reason", "coa_agent_confidence" for LLM


In [None]:
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from dotenv import load_dotenv
import time
from typing import Dict, Tuple
import logging
import json


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

load_dotenv()

# Initialize the ChatGroq model
groq_model = ChatGroq(model_name="Llama-3.3-70b-Specdec")
openai_model = ChatOpenAI(model="gpt-4o")


class TransactionClassifier:
    def __init__(self):
        logger.info("Initializing TransactionClassifier")
        self.system_prompt = """
        You are a helpful financial expert responsible for classifying transactions.
        You will be given some transactions and a list of chart of accounts to reconcile. 

        For each transaction, you must provide:
        1. The most appropriate chart of account
        2. A brief explanation of why this classification was chosen. If confidence score is low, explain why. Take hints from the description of the transaction, look out for entity names, and infer the purpose of the transaction.
        3. A confidence score between 0 and 1 (e.g., 0.95 for high confidence)

        # IMPORTANT: Instructions for your response:
        You must respond with valid JSON in the following format only:
        {
            "account": "string",
            "reasoning": "string",
            "confidence": float
        }
        """
        self.last_request_time = 0
        self.rate_limit_delay = 2  # 2 seconds between requests (30 requests/minute)

    def _rate_limit(self):
        """Implement rate limiting"""
        current_time = time.time()
        time_since_last_request = current_time - self.last_request_time
        if time_since_last_request < self.rate_limit_delay:
            delay = self.rate_limit_delay - time_since_last_request
            logger.debug(f"Rate limiting: waiting {delay:.2f} seconds")
            time.sleep(delay)
        self.last_request_time = time.time() 

    def classify_transaction(self, transaction: Dict, chart_of_accounts: list) -> Tuple[str, str, float]:
        """Classify a single transaction using the LLM"""
        logger.info(f"Processing transaction: {transaction}")
        self._rate_limit()

        # Format the transaction details for the LLM
        transaction_prompt = f"""
        Please classify the following transaction:
        Transaction: {transaction}
        
        chart of accounts: {chart_of_accounts}
        """

        messages = [
            SystemMessage(content=self.system_prompt),
            HumanMessage(content=transaction_prompt)
        ]

        try:
            logger.debug("Sending request to LLM")
            # Add more visible print statements
            print("\n=== Sending request to LLM ===")
            response = openai_model.invoke(messages)
            print("\n=== Raw LLM Response ===")
            print(response.content)
            logger.debug(f"Raw LLM response: {response.content}")

            # Try to parse the response as JSON
            try:
                # First, try direct JSON parsing
                result = json.loads(response.content)
                logger.info("Successfully parsed JSON response")
            except json.JSONDecodeError:
                # If direct parsing fails, try to extract JSON from the response
                logger.warning("Direct JSON parsing failed, attempting to extract JSON from response")
                # Look for JSON-like structure in the response
                import re
                json_match = re.search(r'\{.*\}', response.content, re.DOTALL)
                if json_match:
                    result = json.loads(json_match.group())
                    logger.info("Successfully extracted and parsed JSON from response")
                else:
                    raise ValueError("No JSON structure found in response")

            # Validate the response structure
            required_keys = {'account', 'reasoning', 'confidence'}
            if not all(key in result for key in required_keys):
                missing_keys = required_keys - result.keys()
                raise ValueError(f"Missing required keys in response: {missing_keys}")

            logger.info(f"Classification successful: {result['account']}")
            return (
                result['account'],
                result['reasoning'],
                result['confidence']
            )

        except Exception as e:
            logger.error(f"Classification failed: {str(e)}", exc_info=True)
            return (
                "ERROR",
                f"Classification failed: {str(e)}",
                0.0
            )


def process_transactions(df: pd.DataFrame, chart_of_accounts: list) -> pd.DataFrame:
    """Process transactions in DataFrame and add classifications directly"""
    logger.info(f"Starting to process {len(df)} transactions")
    classifier = TransactionClassifier()

    # Create a fresh copy of the DataFrame
    df = df.copy()
    
    # Explicitly clear any previous results
    df['coa_agent'] = None
    df['coa_reason'] = None
    df['coa_confidence'] = None

    for i, (idx, row) in enumerate(df.iterrows()):
        print("row", row)
        logger.info(f"Processing transaction {idx}")
        transaction = row.to_dict()
        transaction['amount'] = transaction['amount']/100
        account, reasoning, confidence = classifier.classify_transaction(transaction, chart_of_accounts)
        
        # Update DataFrame using iloc instead of at
        df.iloc[i, df.columns.get_loc('coa_agent')] = account
        df.iloc[i, df.columns.get_loc('coa_reason')] = reasoning
        df.iloc[i, df.columns.get_loc('coa_confidence')] = confidence
        
        # Print summary of classification
        reason_preview = ' '.join(reasoning.split()[:10]) + '...' if reasoning else 'No reason provided'
        logger.info(f"Transaction {idx}:")
        logger.info(f"Account: {account}")
        logger.info(f"Reason Preview: {reason_preview}")
        logger.info("-" * 50)

    
    # Add verification logging
    processed_count = df[df['coa_agent'].notna()].shape[0]
    logger.info(f"Number of processed transactions: {processed_count}")
    
    logger.info("Finished processing all transactions")
    return df


""" Fetch and store chart of accounts into parsed_accounts """
import json

# Read the JSON file
with open('coa.json', 'r') as file:
    data = json.load(file)

# Extract only the required fields from each account
parsed_accounts = []
for account in data['Accounts']:
    if account.get('Status') == 'ACTIVE':
        parsed_account = {
            'code': account.get('Code', ''),
            'name': account.get('Name', ''),
            'type': account.get('Type', ''),
            'description': account.get('Description', ''),  # Note: Not present in sample but included as requested
            'class': account.get('Class', ''),
    }
    parsed_accounts.append(parsed_account)

transactions = []  # Create empty list to store dictionaries

for index, row in df.iterrows():
    transaction = row.to_dict()
    transactions.append(transaction)

df_processed = process_transactions(df.copy(), parsed_accounts)

df_processed

In [None]:
from pydantic import BaseModel
import pandas as pd

from app.services.supabase import get_supabase
supabase = await get_supabase()

class TransactionToLLM(BaseModel):
    entity_name : str
    amount : float
    remittance_info : str
    ntropy_enrich : bool
    ntropy_entity : str
    ntropy_category : str



# Query both tables
ntropy_response = await supabase.table('ntropy_transactions').select('*').execute()
gocardless_response = await supabase.table('gocardless_transactions').select('*').execute()

# Convert to DataFrames
ntropy_df = pd.DataFrame(ntropy_response.data)
gocardless_df = pd.DataFrame(gocardless_response.data)

# Merge DataFrames on the specified keys
merged_df = pd.merge(
    gocardless_df,
    ntropy_df,
    how='left',
    left_on='id',
    right_on='ntropy_id'
)

# Create a new DataFrame with selected columns
result_df = pd.DataFrame({
    'entity_name': merged_df['enriched_data'].apply(lambda x: x['entities']['counterparty']['name'] if pd.notnull(x) else None),
    'amount': merged_df['amount']/100,
    'remittance_info': merged_df['remittance_info'],
    'ntropy_enrich': merged_df['enriched_data'].notnull(),
    'ntropy_entity': merged_df['enriched_data'].apply(lambda x: x['entities']['counterparty']['name'] if pd.notnull(x) else None),
    'ntropy_category': merged_df['enriched_data'].apply(lambda x: x['categories']['general'] if pd.notnull(x) else None)
})

# Print or return the result
print(result_df)

In [None]:
" NTROPY "
import requests

ntropy_api_key = os.getenv("NTROPY_API_KEY")
if not ntropy_api_key:
    raise ValueError("NTROPY_API_KEY is not set")
else:
    print(f"NTROPY_API_KEY is set")

""" CREATE NEW ACCOUNT HOLDER """
url = "https://api.ntropy.com/v3/account_holders"
headers = {
    "Accept": "application/json",
    "X-API-KEY": ntropy_api_key,
    "Content-Type": "application/json"
}

data = {
    "id": "35b927b6-6fda-40aa-93b8-95b47c2b2cad",
    "type": "business",
    "name": "Michael Ali",
    "website": "https://flowon.ai",
    "industry": "ai software"
}

response = requests.post(url, headers=headers, json=data)
print(response.json())


In [None]:
""" NTROPY BATCH PROCESS TRANSACTIONS"""
import uuid

url = "https://api.ntropy.com/v3/batches/"


data = {
        "operation": "POST /v3/transactions",
        "data": transformed_data
    }

response = requests.post(url, headers=headers, json=data)
print(response.json())


In [None]:
""" NOW CHECK TRANSACTION STATUS """
batch_id = "1a2bc613-111b-49b1-b35c-77e9b1d7a2fc"

url = f"https://api.ntropy.com/v3/batches/{batch_id}/results"


get_batch = requests.get(url, headers=headers)

get_batch.json()

In [None]:
get_batch.json()['results'][0]

In [None]:

transaction_id = "1177539c-b570-4588-9953-d76ae4647afb"

url = f"https://api.ntropy.com/v3/transactions/{transaction_id}"

get_transaction = requests.get(url, headers=headers)

get_transaction.json()

In [None]:
import json 

""" DF TO JSON FIELDS """
# Convert DataFrame to JSON
def prepare_df_for_frontend(df):
    # Reset index to make bookingDate a column
    df = df.reset_index()
    
    # Convert datetime to ISO format string
    df['bookingDate'] = df['bookingDate'].dt.strftime('%Y-%m-%dT%H:%M:%S')
    
    # Convert to JSON records format (this gives us a string)
    json_string = df.to_json(orient='records', date_format='iso')
    
    # Parse the JSON string into Python objects (list of dictionaries)
    json_data = json.loads(json_string)
    
    return json_data

json_data = prepare_df_for_frontend(df)



In [None]:
import uuid 
"""  """
def transform_transaction(transaction, account_holder_id):
    # Transform a single transaction
    return {
        "id": str(uuid.uuid4()),
        "description": transaction["remittanceInfo"],
        "date": transaction["bookingDate"].split("T")[0],
        "amount": abs(transaction["amount"]/100),  # Make amount positive
        "entry_type": "outgoing" if transaction["amount"] < 0 else "incoming",
        "currency": transaction["currency"],
        "account_holder_id": account_holder_id,
        "location": {
            "country": "GB"
        }
    }

# Example usage:
account_holder_id = "35b927b6-6fda-40aa-93b8-95b47c2b2cad"
# Transform all transactions using list comprehension
transformed_data = [
    transform_transaction(transaction, account_holder_id) 
    for transaction in json_data
]

transformed_data

## To evaluate reponses of LLM

In [6]:
df_processed.to_csv("transactions_ai_reconciled.csv")