In [4]:
import pandasai as pai
import pandas as pd
import ast
from dotenv import load_dotenv
import os 

load_dotenv()

pai_api_key = os.getenv("PAI_API_KEY")
if not pai_api_key:
    raise ValueError("PAI_API_KEY is not set")
else:
    print(f"OPENAI_API_KEY is set")

df = pd.read_csv("bank_data_23-25/barclays_072.csv")

# Sample DataFrame
df = pai.DataFrame(df)

pai.api_key.set(pai_api_key)



OPENAI_API_KEY is set


In [None]:
### extract currency and amount
df['currency'] = df['transactionAmount'].apply(lambda x: ast.literal_eval(x)['currency'])
df['amount'] = df['transactionAmount'].apply(lambda x: float(ast.literal_eval(x)['amount']))
df['amount'] = (df['amount'] * 100).astype(int)
df = df.drop('transactionAmount', axis=1)

## setting datetimeindex
df['bookingDate'] = pd.to_datetime(df['bookingDate'])
df.set_index('bookingDate', inplace=True)

# df.chat("What are the total expenses for 2024?")



### XERO TIME!!

In [47]:
BankTransactions = { 
    "BankTransactions": [ { "Type": "SPEND", 
                           "Contact": { "ContactID": "00000000-0000-0000-0000-000000000000" }, 
                            "Lineitems": [ { "Description": "Foobar", "Quantity": 1, "UnitAmount": 20, "AccountCode": "400" } ], 
                            "BankAccount": { "Code": "088" } 
                            } ] 
}     

In [48]:
BankTransactions = { 
    "BankTransactions": [ { "Type": "SPEND", 
                           "Contact": { "ContactID": "00000220-0000-0000-0000-000000000000" }, 
                            "Lineitems": [ { "Description": "Foobar", "Quantity": 1, "UnitAmount": 20, "AccountCode": "400" } ], 
                            "BankAccount": { "Code": "088" } 
                            } ] 
}     


""" PUT https://api.xero.com/api.xro/2.0/BankTransactions
    EXAMPLE BODY GET REQUEST
{
  "BankTransactions": [
    {
      "Type": "SPEND",
      "Contact": {
        "ContactID": "ea791a0a-081c-4833-a4f1-3cccb323ec4a"  
      },
      "LineItems": [
        {
          "Description": "Foobar",
          "Quantity": 1.0,
          "UnitAmount": 20.0,
          "AccountCode": "433" 
        }
      ],
      "BankAccount": {
        "Code": "600" 
      }
    }
  ]
}
"""

{'BankTransactions': [{'Type': 'SPEND',
   'Contact': {'ContactID': '00000000-0000-0000-0000-000000000000'},
   'Lineitems': [{'Description': 'Foobar',
     'Quantity': 1,
     'UnitAmount': 20,
     'AccountCode': '400'}],
   'BankAccount': {'Code': '088'}}]}

### LLM Reconciliation Time

- Add new columns "coa_agent", "coa_reason", "coa_agent_confidence" for LLM


In [53]:
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
import os
from dotenv import load_dotenv

load_dotenv()

# Set your Groq API key
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

# Initialize the ChatGroq model
# Using the Mixtral model, but you can also use "llama2-70b-4096" or others
chat_model = ChatGroq(model_name="Llama-3.3-70b-Specdec")

cal_system_prompt ="""
You are helpful assistant that helps me find available slots for my calendar.

You must only respond with the available slots, no other text.
"""

cal_user_prompt = """
# instructions 
I will provide the start and end dates (window), along with the unavailable slots in between. You must respond with two available 30 minuteslots in between this window, ideally spread out.
You must only respond with the available slots, no other text.

## start and end dates:
{'start': {'dateTime': '2025-02-13', 'timeZone': 'Europe/London'}, 'end': {'dateTime': '2025-02-13', 'timeZone': 'Europe/London'}}

## unavailable slots:
{'start': {'dateTime': '2025-02-13T10:30:00Z', 'timeZone': 'Europe/London'}, 'end': {'dateTime': '2025-02-13T11:00:00Z', 'timeZone': 'Europe/London'}}

## available slots:
"""

# Create messages with system prompt and human message
messages = [
    SystemMessage(content=cal_system_prompt),
    HumanMessage(content=cal_user_prompt)
]

# Send the message and get the response
response = chat_model.invoke(messages)

In [None]:
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
import os
from dotenv import load_dotenv
import time
from typing import Dict, Tuple
import logging
import json

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

load_dotenv()

# Initialize the ChatGroq model
chat_model = ChatGroq(model_name="Llama-3.3-70b-Specdec")

class TransactionClassifier:
    def __init__(self):
        logger.info("Initializing TransactionClassifier")
        self.system_prompt = """
        You are a financial expert responsible for classifying transactions into appropriate chart of accounts.
        For each transaction, you must provide:
        1. The most specific appropriate chart of account code
        2. A brief explanation of why this classification was chosen
        3. A confidence score between 0 and 1 (e.g., 0.95 for high confidence)

        You must respond with valid JSON in the following format only:
        {
            "account_code": "string",
            "reasoning": "string",
            "confidence": float
        }
        """
        self.last_request_time = 0
        self.rate_limit_delay = 2  # 2 seconds between requests (30 requests/minute)

    def _rate_limit(self):
        """Implement rate limiting"""
        current_time = time.time()
        time_since_last_request = current_time - self.last_request_time
        if time_since_last_request < self.rate_limit_delay:
            delay = self.rate_limit_delay - time_since_last_request
            logger.debug(f"Rate limiting: waiting {delay:.2f} seconds")
            time.sleep(delay)
        self.last_request_time = time.time()

    def classify_transaction(self, transaction: Dict) -> Tuple[str, str, float]:
        """Classify a single transaction using the LLM"""
        logger.info(f"Processing transaction: {transaction['description']}")
        self._rate_limit()

        # Format the transaction details for the LLM
        transaction_prompt = f"""
        Please classify the following transaction:
        Description: {transaction['description']}
        Amount: {transaction['amount']}
        Date: {transaction.get('date', 'Not provided')}
        
        Additional context: {transaction.get('additional_context', 'None')}
        """

        messages = [
            SystemMessage(content=self.system_prompt),
            HumanMessage(content=transaction_prompt)
        ]

        try:
            logger.debug("Sending request to LLM")
            response = chat_model.invoke(messages)
            logger.debug(f"Raw LLM response: {response.content}")

            # Try to parse the response as JSON
            try:
                # First, try direct JSON parsing
                result = json.loads(response.content)
                logger.info("Successfully parsed JSON response")
            except json.JSONDecodeError:
                # If direct parsing fails, try to extract JSON from the response
                logger.warning("Direct JSON parsing failed, attempting to extract JSON from response")
                # Look for JSON-like structure in the response
                import re
                json_match = re.search(r'\{.*\}', response.content, re.DOTALL)
                if json_match:
                    result = json.loads(json_match.group())
                    logger.info("Successfully extracted and parsed JSON from response")
                else:
                    raise ValueError("No JSON structure found in response")

            # Validate the response structure
            required_keys = {'account_code', 'reasoning', 'confidence'}
            if not all(key in result for key in required_keys):
                missing_keys = required_keys - result.keys()
                raise ValueError(f"Missing required keys in response: {missing_keys}")

            logger.info(f"Classification successful: {result['account_code']}")
            return (
                result['account_code'],
                result['reasoning'],
                result['confidence']
            )

        except Exception as e:
            logger.error(f"Classification failed: {str(e)}", exc_info=True)
            return (
                "ERROR",
                f"Classification failed: {str(e)}",
                0.0
            )

def process_transactions(transactions: list) -> list:
    """Process a list of transactions and add classifications"""
    logger.info(f"Starting to process {len(transactions)} transactions")
    classifier = TransactionClassifier()
    classified_transactions = []

    for idx, transaction in enumerate(transactions, 1):
        logger.info(f"Processing transaction {idx}/{len(transactions)}")
        account_code, reasoning, confidence = classifier.classify_transaction(transaction)
        
        # Add classification results to the transaction
        classified_transaction = transaction.copy()
        classified_transaction.update({
            'chart_of_account_agent': account_code,
            'chart_of_account_reason': reasoning,
            'chart_of_account_confidence': confidence
        })
        classified_transactions.append(classified_transaction)
        logger.debug(f"Transaction {idx} classification results: {account_code}")

    logger.info("Finished processing all transactions")
    return classified_transactions

# Example usage with error handling
if __name__ == "__main__":
    try:
        transactions = [
            {
                'description': 'Office supplies purchase from Staples',
                'amount': 156.78,
                'date': '2024-03-15',
                'additional_context': 'Monthly office supply restock'
            }
        ]
        
        logger.info("Starting transaction classification process")
        classified_transactions = process_transactions(transactions)
        logger.info("Classification process completed")
        
        # Print results in a readable format
        for transaction in classified_transactions:
            logger.info("Classification Results:")
            logger.info(f"Description: {transaction['description']}")
            logger.info(f"Account Code: {transaction['chart_of_account_agent']}")
            logger.info(f"Reason: {transaction['chart_of_account_reason']}")
            logger.info(f"Confidence: {transaction['chart_of_account_confidence']}")
            
    except Exception as e:
        logger.error("Main process failed", exc_info=True)
# Example usage
transactions = [
    {
        'description': 'Office supplies purchase from Staples',
        'amount': 156.78,
        'date': '2024-03-15',
        'additional_context': 'Monthly office supply restock'
    },
    # ... more transactions ...
]

classified_transactions = process_transactions(transactions)

classified_transactions

In [None]:

""" Fetch and store chart of accounts into parsed_accounts """
import json

# Read the JSON file
with open('coa.json', 'r') as file:
    data = json.load(file)

# Extract only the required fields from each account
parsed_accounts = []
for account in data['Accounts']:
    parsed_account = {
        'code': account.get('AccountID', ''),
        'name': account.get('Name', ''),
        'status': account.get('Status', ''),
        'type': account.get('Type', ''),
        'taxtype': account.get('TaxType', ''),
        'description': account.get('Description', ''),  # Note: Not present in sample but included as requested
        'class': account.get('Class', ''),
        'reportingcode': account.get('ReportingCode', '')
    }
    parsed_accounts.append(parsed_account)

# Optional: Print the results to verify
for account in parsed_accounts:
    print(account)

# # Optional: Save the parsed data to a new JSON file
# with open('parsed_accounts.json', 'w') as file:
#     json.dump(parsed_accounts, file, indent=2)