## Simi Search with CoA

In [7]:
import voyageai

vo = voyageai.Client()
# This will automatically use the environment variable VOYAGE_API_KEY.
# Alternatively, you can use vo = voyageai.Client(api_key="<your secret key>")

texts = [
    "The Mediterranean diet emphasizes fish, olive oil, and vegetables, believed to reduce chronic diseases.",
    "Photosynthesis in plants converts light energy into glucose and produces essential oxygen.",
    "20th-century innovations, from radios to smartphones, centered on electronic advancements.",
    "Rivers provide water, irrigation, and habitat for aquatic species, vital for ecosystems.",
    "Apple’s conference call to discuss fourth fiscal quarter results and business updates is scheduled for Thursday, November 2, 2023 at 2:00 p.m. PT / 5:00 p.m. ET.",
    "Shakespeare's works, like 'Hamlet' and 'A Midsummer Night's Dream,' endure in literature."
]

# Embed the documents
result = vo.embed(texts, model="voyage-3", input_type="document")
print(result.embeddings)

[[0.005100732669234276, -0.04479769989848137, 0.023507222533226013, 0.00046671158634126186, 0.034570515155792236, 0.03592182695865631, 0.01668955385684967, 0.04588081315159798, 0.009218454360961914, 0.05997063219547272, -0.02080487087368965, -0.012733262963593006, -0.03990061581134796, -0.058627624064683914, -0.02297099120914936, -0.026423323899507523, -0.027353469282388687, -0.04811771214008331, 0.03685704991221428, 0.005392604507505894, -0.03169400990009308, 0.045127611607313156, -0.03465614840388298, -0.020341765135526657, -0.012931176461279392, -0.057168398052453995, -0.00562303839251399, -0.014229186810553074, -0.025026574730873108, 0.020626181736588478, 0.011710495688021183, -0.016807515174150467, 0.01029419619590044, 0.004776011686772108, 0.01944831758737564, -0.011412533931434155, -0.02737291157245636, -0.0002529613848309964, -0.0034894689451903105, -0.03553517535328865, -0.015875838696956635, 0.0011499038664624095, -0.014021443203091621, 0.01771552860736847, -0.032497890293598

In [None]:
import voyageai
import os 
from dotenv import load_dotenv
from typing import Literal
load_dotenv()

voyage_api_key = os.getenv("VOYAGE_API_KEY")

client = voyageai.Client(api_key=voyage_api_key)
input_type = Literal["document", "query"]

response = client.embed(
    texts="salary payment to developers",
    model="voyage-finance-2",
    input_type="document"
)
response.embeddings

[[-0.0020986171439290047,
  0.0476432703435421,
  0.015492458827793598,
  0.019596422091126442,
  0.02047317661345005,
  0.047099966555833817,
  -0.007135298103094101,
  0.008650965988636017,
  0.014648348093032837,
  -0.03676777333021164,
  -0.024311313405632973,
  0.042998332530260086,
  -0.03851195424795151,
  -0.03447328507900238,
  -0.048995714634656906,
  0.002812146907672286,
  0.011122670955955982,
  0.006584994029253721,
  0.007834837771952152,
  -0.017740309238433838,
  -0.028988897800445557,
  -0.03125540539622307,
  0.015408513136208057,
  -0.047221217304468155,
  -0.0016975481994450092,
  -0.04727018252015114,
  0.023196713998913765,
  -0.023355277255177498,
  0.009970763698220253,
  -0.00036376030766405165,
  -0.01770300231873989,
  -0.08176679164171219,
  -0.03964054584503174,
  0.01396279875189066,
  -0.010800883173942566,
  -0.004537676926702261,
  -0.02581765316426754,
  0.04734480381011963,
  0.06055210158228874,
  -0.019349249079823494,
  -0.02805151604115963,
  0.0

In [None]:
from app.services.xero import vectorise_coa

await vectorise_coa("ALL")

In [1]:
from typing import List, Dict, Any

from app.services.vectorise_data import get_embedding, JinaTask
from app.services.supabase import get_supabase

supabase = await get_supabase()

async def search_chart_of_accounts(
    query_text: str,
    match_threshold: float = 0.5,
    match_count: int = 10,
    user_id: str = None
) -> List[Dict[Any, Any]]:
    """
    Search for chart of accounts entries similar to the provided query text.
    
    Args:
        query_text: The transaction description or query text
        match_threshold: Minimum similarity score (0-1)
        match_count: Maximum number of results to return
        user_id: Optional user ID to filter results
    
    Returns:
        List of matching chart of accounts entries with similarity scores
    """
    # Generate embedding for the query text
    embedding, token_count = await get_embedding(query_text, JinaTask.RETRIEVAL_QUERY)
    
    # Call the RPC function
    response = await supabase.rpc(
        'search_chart_of_accounts',
        {
            'query_text': query_text,
            'query_embedding': embedding,
            'match_threshold': match_threshold,
            'match_count': match_count,
            'search_user_id': user_id
        }
    ).execute()
    
    # Check for errors
    if response.error:
        raise Exception(f"Error calling search_chart_of_accounts: {response.error}")
    
    # Return the results
    return response.data


query = "Office supplies purchase for marketing department"

# Search for matching chart of accounts
results = await search_chart_of_accounts(
    query_text=query,
    match_threshold=0.6,
    match_count=5,
    user_id="user123"  # Optional, pass None to search all users
)

# Display results
print(f"Found {len(results)} matching accounts for query: '{query}'")
for i, match in enumerate(results, 1):
    print(f"{i}. {match['name']} (Account ID: {match['account_id']}, Code: {match['code']})")
    print(f"   Type: {match['account_type']}")
    print(f"   Description: {match['description']}")
    print(f"   Similarity: {match['similarity']:.4f}")
    print()



INFO:app.services.supabase:Initializing Supabase client...
INFO:app.services.supabase:Supabase client initialized successfully
INFO:app.services.vectorise_data:Requesting embedding from Jina AI
DEBUG:app.services.vectorise_data:Successfully received embedding from Jina AI
INFO:httpx:HTTP Request: POST https://lvhbpccylcxeehgxtrwv.supabase.co/rest/v1/rpc/search_chart_of_accounts "HTTP/2 200 OK"


AttributeError: 'SingleAPIResponse[~_ReturnT]' object has no attribute 'error'

In [None]:
APIError: {'code': '42702', 'details': None, 'hint': None, 'message': 'column reference "user_id" is ambiguous'}


### Import bank data from csv

In [None]:
import pandasai as pai
import pandas as pd
import ast
from dotenv import load_dotenv
import os 

load_dotenv()

pai_api_key = os.getenv("OPENAI_API_KEY")
if not pai_api_key:
    raise ValueError("PAI_API_KEY is not set")
else:
    print(f"OPENAI_API_KEY is set")

df = pd.read_csv("bank_data_23-25/barclays_072.csv")

# Sample DataFrame
df = pai.DataFrame(df)

pai.api_key.set(pai_api_key)


### extract currency and amount
df['currency'] = df['transactionAmount'].apply(lambda x: ast.literal_eval(x)['currency'])
df['amount'] = df['transactionAmount'].apply(lambda x: float(ast.literal_eval(x)['amount']))
df['amount'] = (df['amount'] * 100).astype(int)
df = df.drop('transactionAmount', axis=1)

## setting datetimeindex
df['bookingDate'] = pd.to_datetime(df['bookingDate'])
df.set_index('bookingDate', inplace=True)
df = df.drop(['valueDate', 'bookingDateTime', 'valueDateTime', 'internalTransactionId'], axis=1)
df = df.rename(columns={'remittanceInformationUnstructured': 'remittanceInfo'})

# df.chat("What are the total expenses for 2024?")


### XERO TIME!!

In [None]:
BankTransactions = { 
    "BankTransactions": [ { "Type": "SPEND", 
                           "Contact": { "ContactID": "00000220-0000-0000-0000-000000000000" }, 
                            "Lineitems": [ { "Description": "Foobar", "Quantity": 1, "UnitAmount": 20, "AccountCode": "400" } ], 
                            "BankAccount": { "Code": "088" } 
                            } ] 
}     


""" PUT https://api.xero.com/api.xro/2.0/BankTransactions

    EXAMPLE BODY GET REQUEST
{
  "BankTransactions": [
    {
      "Type": "SPEND",
      "Contact": {
        "ContactID": "ea791a0a-081c-4833-a4f1-3cccb323ec4a"  
      },
      "LineItems": [
        {
          "Description": "Foobar",
          "Quantity": 1.0,
          "UnitAmount": 20.0,
          "AccountCode": "433" 
        }
      ],
      "BankAccount": {
        "Code": "600" 
      }
    }
  ]
}
"""

### LLM Reconciliation Time

- Add new columns "coa_agent", "coa_reason", "coa_agent_confidence" for LLM


In [None]:
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from dotenv import load_dotenv
import time
from typing import Dict, Tuple
import logging
import json


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

load_dotenv()

# Initialize the ChatGroq model
groq_model = ChatGroq(model_name="Llama-3.3-70b-Specdec")
openai_model = ChatOpenAI(model="gpt-4o")


class TransactionClassifier:
    def __init__(self):
        logger.info("Initializing TransactionClassifier")
        self.system_prompt = """
        You are a financial expert responsible for classifying transactions into appropriate chart of accounts.
        For each transaction, you must provide:
        1. The most specific appropriate chart of account
        2. A brief explanation of why this classification was chosen. If confidence score is low, explain why.
        3. A confidence score between 0 and 1 (e.g., 0.95 for high confidence)

        You must respond with valid JSON in the following format only:
        {
            "account": "string",
            "reasoning": "string",
            "confidence": float
        }
        """
        self.last_request_time = 0
        self.rate_limit_delay = 2  # 2 seconds between requests (30 requests/minute)

    def _rate_limit(self):
        """Implement rate limiting"""
        current_time = time.time()
        time_since_last_request = current_time - self.last_request_time
        if time_since_last_request < self.rate_limit_delay:
            delay = self.rate_limit_delay - time_since_last_request
            logger.debug(f"Rate limiting: waiting {delay:.2f} seconds")
            time.sleep(delay)
        self.last_request_time = time.time() 

    def classify_transaction(self, transaction: Dict, chart_of_accounts: list) -> Tuple[str, str, float]:
        """Classify a single transaction using the LLM"""
        logger.info(f"Processing transaction: {transaction}")
        self._rate_limit()

        # Format the transaction details for the LLM
        transaction_prompt = f"""
        Please classify the following transaction:
        Transaction: {transaction}
        
        """

        messages = [
            SystemMessage(content=self.system_prompt),
            HumanMessage(content=transaction_prompt)
        ]

        try:
            logger.debug("Sending request to LLM")
            # Add more visible print statements
            print("\n=== Sending request to LLM ===")
            response = openai_model.invoke(messages)
            print("\n=== Raw LLM Response ===")
            print(response.content)
            logger.debug(f"Raw LLM response: {response.content}")

            # Try to parse the response as JSON
            try:
                # First, try direct JSON parsing
                result = json.loads(response.content)
                logger.info("Successfully parsed JSON response")
            except json.JSONDecodeError:
                # If direct parsing fails, try to extract JSON from the response
                logger.warning("Direct JSON parsing failed, attempting to extract JSON from response")
                # Look for JSON-like structure in the response
                import re
                json_match = re.search(r'\{.*\}', response.content, re.DOTALL)
                if json_match:
                    result = json.loads(json_match.group())
                    logger.info("Successfully extracted and parsed JSON from response")
                else:
                    raise ValueError("No JSON structure found in response")

            # Validate the response structure
            required_keys = {'account', 'reasoning', 'confidence'}
            if not all(key in result for key in required_keys):
                missing_keys = required_keys - result.keys()
                raise ValueError(f"Missing required keys in response: {missing_keys}")

            logger.info(f"Classification successful: {result['account']}")
            return (
                result['account'],
                result['reasoning'],
                result['confidence']
            )

        except Exception as e:
            logger.error(f"Classification failed: {str(e)}", exc_info=True)
            return (
                "ERROR",
                f"Classification failed: {str(e)}",
                0.0
            )


def process_transactions(df: pd.DataFrame, chart_of_accounts: list) -> pd.DataFrame:
    """Process transactions in DataFrame and add classifications directly"""
    logger.info(f"Starting to process {len(df)} transactions")
    classifier = TransactionClassifier()

    # Create a fresh copy of the DataFrame
    df = df.copy()
    
    # Explicitly clear any previous results
    df['coa_agent'] = None
    df['coa_reason'] = None
    df['coa_confidence'] = None

    for i, (idx, row) in enumerate(df.iterrows()):
        print("row", row)
        logger.info(f"Processing transaction {idx}")
        transaction = row.to_dict()
        transaction['amount'] = transaction['amount']/100
        account, reasoning, confidence = classifier.classify_transaction(transaction, chart_of_accounts)
        
        # Update DataFrame using iloc instead of at
        df.iloc[i, df.columns.get_loc('coa_agent')] = account
        df.iloc[i, df.columns.get_loc('coa_reason')] = reasoning
        df.iloc[i, df.columns.get_loc('coa_confidence')] = confidence
        
        # Print summary of classification
        reason_preview = ' '.join(reasoning.split()[:10]) + '...' if reasoning else 'No reason provided'
        logger.info(f"Transaction {idx}:")
        logger.info(f"Account: {account}")
        logger.info(f"Reason Preview: {reason_preview}")
        logger.info("-" * 50)

    
    # Add verification logging
    processed_count = df[df['coa_agent'].notna()].shape[0]
    logger.info(f"Number of processed transactions: {processed_count}")
    
    logger.info("Finished processing all transactions")
    return df


""" Fetch and store chart of accounts into parsed_accounts """
import json

# Read the JSON file
with open('coa.json', 'r') as file:
    data = json.load(file)

# Extract only the required fields from each account
parsed_accounts = []
for account in data['Accounts']:
    parsed_account = {
        'code': account.get('AccountID', ''),
        'name': account.get('Name', ''),
        'status': account.get('Status', ''),
        'type': account.get('Type', ''),
        'taxtype': account.get('TaxType', ''),
        'description': account.get('Description', ''),  # Note: Not present in sample but included as requested
        'class': account.get('Class', ''),
        'reportingcode': account.get('ReportingCode', '')
    }
    parsed_accounts.append(parsed_account)

transactions = []  # Create empty list to store dictionaries

for index, row in df.iterrows():
    transaction = row.to_dict()
    transactions.append(transaction)

df_processed = process_transactions(df.copy(), parsed_accounts)

df_processed

In [None]:
"""
feeding per line of transaction
"""

In [None]:
from ntropy_sdk import SDK
import os 
from dotenv import load_dotenv

load_dotenv()

ntropy_api_key = os.getenv("NTROPY_API_KEY")

sdk = SDK(ntropy_api_key)

data = [{
    "id": "4yp49x3tbj9mD8DB4fM8DDY6Yxbx8YP14g565Xketw3tFmn",
    "description": "AMAZON WEB SERVICES",
    "entry_type": "outgoing",
    "amount": 12042.37,
    "currency": "USD",
    "date": "2021-11-01",
    "account_holder_id": "35b927b6-6fda-40aa-93b8-95b47c2b2cad",  # Add this required field
    "location": {
        "country": "US"
    }
}]

r = sdk.batches.create(
    operation="POST /v3/transactions",
    data=data
)


vars(r)

In [None]:
vars(vars(sdk.batches.results(id=r.id))['results'][0])

In [None]:
vars(sdk.batches.get(id=r.id))

In [None]:
sdk.batches.create(
    operation="POST /v3/transactions",
    data=transactions
)

In [None]:
vars(r)

In [None]:
" NTROPY "
import requests

ntropy_api_key = os.getenv("NTROPY_API_KEY")
if not ntropy_api_key:
    raise ValueError("NTROPY_API_KEY is not set")
else:
    print(f"NTROPY_API_KEY is set")

""" CREATE NEW ACCOUNT HOLDER """
url = "https://api.ntropy.com/v3/account_holders"
headers = {
    "Accept": "application/json",
    "X-API-KEY": ntropy_api_key,
    "Content-Type": "application/json"
}

data = {
    "id": "35b927b6-6fda-40aa-93b8-95b47c2b2cad",
    "type": "business",
    "name": "Michael Ali",
    "website": "https://flowon.ai",
    "industry": "ai software"
}

response = requests.post(url, headers=headers, json=data)
print(response.json())


In [None]:
""" NTROPY BATCH PROCESS TRANSACTIONS"""
import uuid

url = "https://api.ntropy.com/v3/batches/"


data = {
        "operation": "POST /v3/transactions",
        "data": transformed_data
    }

response = requests.post(url, headers=headers, json=data)
print(response.json())


In [None]:
""" NOW CHECK TRANSACTION STATUS """
batch_id = "1a2bc613-111b-49b1-b35c-77e9b1d7a2fc"

url = f"https://api.ntropy.com/v3/batches/{batch_id}/results"


get_batch = requests.get(url, headers=headers)

get_batch.json()

In [None]:
get_batch.json()['results'][0]

In [None]:

transaction_id = "1177539c-b570-4588-9953-d76ae4647afb"

url = f"https://api.ntropy.com/v3/transactions/{transaction_id}"

get_transaction = requests.get(url, headers=headers)

get_transaction.json()

In [None]:
import json 

""" DF TO JSON FIELDS """
# Convert DataFrame to JSON
def prepare_df_for_frontend(df):
    # Reset index to make bookingDate a column
    df = df.reset_index()
    
    # Convert datetime to ISO format string
    df['bookingDate'] = df['bookingDate'].dt.strftime('%Y-%m-%dT%H:%M:%S')
    
    # Convert to JSON records format (this gives us a string)
    json_string = df.to_json(orient='records', date_format='iso')
    
    # Parse the JSON string into Python objects (list of dictionaries)
    json_data = json.loads(json_string)
    
    return json_data

json_data = prepare_df_for_frontend(df)



In [None]:
import uuid 
"""  """
def transform_transaction(transaction, account_holder_id):
    # Transform a single transaction
    return {
        "id": str(uuid.uuid4()),
        "description": transaction["remittanceInfo"],
        "date": transaction["bookingDate"].split("T")[0],
        "amount": abs(transaction["amount"]/100),  # Make amount positive
        "entry_type": "outgoing" if transaction["amount"] < 0 else "incoming",
        "currency": transaction["currency"],
        "account_holder_id": account_holder_id,
        "location": {
            "country": "GB"
        }
    }

# Example usage:
account_holder_id = "35b927b6-6fda-40aa-93b8-95b47c2b2cad"
# Transform all transactions using list comprehension
transformed_data = [
    transform_transaction(transaction, account_holder_id) 
    for transaction in json_data
]

transformed_data

## To evaluate reponses of LLM

In [6]:
df_processed.to_csv("transactions_ai_reconciled.csv")