In [1]:
# The OpenAI SDK was updated on Nov 8, 2023 with new guidance for migration
# See: https://github.com/openai/openai-python/discussions/742

## Updated
import os
from openai import AzureOpenAI
from dotenv import load_dotenv
load_dotenv()

client = AzureOpenAI(
  api_key=os.environ['AZURE_OPENAI_API_KEY'],  # this is also the default, it can be omitted
  azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'],
  api_version = "2024-06-01"
  )

deployment=os.environ['AZURE_OPENAI_DEPLOYMENT']

In [38]:
def get_completion(system_prompt, user_prompt):
    messages = [{"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}]       
    response = client.chat.completions.create(   
        model=deployment,                                         
        messages=messages,
        temperature=0.5, # this is the degree of randomness of the model's output
        max_tokens=2048
    )

        # Get the token usage  
    completion_tokens= response.usage.completion_tokens
    prompt_tokens = response.usage.prompt_tokens
    total_tokens = response.usage.total_tokens
  
    print(f"Total Tokens: {total_tokens}")  
    print(f"Prompt Tokens: {prompt_tokens}")  
    print(f"Completion Tokens: {completion_tokens}")  
    return response.choices[0].message.content


# Prompting with Data

## Example 1
Analysis with made up data

In [39]:
import pandas as pd
forecast_df = pd.DataFrame({'Record_ID':[1,2,3,4,5],
                            'Unit_Cost':[10,20,30,40,50],
                            'Number of Units':[10,80,8,5,8]})
forecast_df['Total Cost'] =forecast_df['Unit_Cost']*forecast_df['Number of Units']

actuals_df = pd.DataFrame({'Record_ID':[1,2,3,4,5],
                            'Unit_Cost':[8,22,25,42,48],
                            'Number of Units':[8,70,9,7,9]})

actuals_df['Total Cost'] = actuals_df['Unit_Cost']*actuals_df['Number of Units']

In [40]:
forecast_df.head()

Unnamed: 0,Record_ID,Unit_Cost,Number of Units,Total Cost
0,1,10,10,100
1,2,20,80,1600
2,3,30,8,240
3,4,40,5,200
4,5,50,8,400


In [41]:
actuals_df.head()

Unnamed: 0,Record_ID,Unit_Cost,Number of Units,Total Cost
0,1,8,8,64
1,2,22,70,1540
2,3,25,9,225
3,4,42,7,294
4,5,48,9,432


In [42]:
def get_compressed_json(df):
    # Flatten the DataFrame  
    df_flattened = pd.json_normalize(df.to_dict(orient='records'))  
    
    # Convert to JSON  
    json_data = df_flattened.to_json(orient='records') 
    return json_data 


def get_prompts(forecast_df, actuals_df, compress=False):

    ## ---------- Call the helper method
    if compress:
        forecast_data = get_compressed_json(forecast_df)
        actuals_data = get_compressed_json(actuals_df)
    else:
        forecast_data = forecast_df.to_dict()
        actuals_data = actuals_df.to_dict()

    ### 1. Set primary content or prompt text
    system_prompt= f"""
    You are a financial analysis assistant.
    You help by comparing actual and forecast data for financial data, in detail and summarizing results. 
    """

    ### 2. Use that in the prompt template below
    user_prompt = f"""
    There are a set of expenses that are compiled under the travel and intertainment General Ledge account.
    These expenses are composed of unit costs and number of units.

    There is a forecast (also known as budget) of each item with the following values:
    {forecast_data}

    Similarly, actual results were reported with the following values:
    {actuals_data}

    Provide a brief analysis of the variance between actuals and forecast, highlighting the main causes of the variance.
    Call out specific percentages or dollar values.
    """
    return system_prompt, user_prompt

In [43]:
# Getting response without compressing data
system_prompt, user_prompt = get_prompts(forecast_df, actuals_df, compress=False)
response = get_completion(system_prompt,user_prompt)
print(response)

Total Tokens: 1191
Prompt Tokens: 420
Completion Tokens: 771
### Variance Analysis: Travel and Entertainment Expenses

#### Summary of Forecast vs. Actual Data

| Record_ID | Forecast Unit Cost | Actual Unit Cost | Forecast Units | Actual Units | Forecast Total Cost | Actual Total Cost |
|-----------|---------------------|------------------|----------------|--------------|---------------------|-------------------|
| 1         | $10                 | $8               | 10             | 8            | $100                | $64               |
| 2         | $20                 | $22              | 80             | 70           | $1600               | $1540             |
| 3         | $30                 | $25              | 8              | 9            | $240                | $225              |
| 4         | $40                 | $42              | 5              | 7            | $200                | $294              |
| 5         | $50                 | $48              | 8          

In [44]:
# Getting response compressing data
system_prompt, user_prompt = get_prompts(forecast_df, actuals_df, compress=True)
response = get_completion(system_prompt,user_prompt)
print(response)

Total Tokens: 1092
Prompt Tokens: 368
Completion Tokens: 724
### Variance Analysis: Travel and Entertainment Expenses

#### Summary of Forecast vs. Actual Data

| Record ID | Unit Cost (Forecast) | Number of Units (Forecast) | Total Cost (Forecast) | Unit Cost (Actual) | Number of Units (Actual) | Total Cost (Actual) | Variance ($) | Variance (%) |
|-----------|----------------------|----------------------------|-----------------------|---------------------|-------------------------|---------------------|--------------|--------------|
| 1         | $10                  | 10                         | $100                  | $8                  | 8                       | $64                 | -$36        | -36.0%       |
| 2         | $20                  | 80                         | $1600                 | $22                 | 70                      | $1540               | -$60        | -3.8%        |
| 3         | $30                  | 8                          | $240           

## Example 2
Using template from Microsoft found here: https://create.microsoft.com/en-us/template/business-expense-budget-d14f83e2-f49d-4080-9e95-0bd4849fa63f


In [45]:
forecast_workbook_path = 'files/Business expense budget.xlsx'
actuals_workbook_path = 'files/Business expense budget.xlsx'
forecast_sheet_name = 'Forecast Expenses Database'
actuals_sheet_name = 'Actuals Expenses Database'


forecast_df = pd.read_excel(forecast_workbook_path, sheet_name=forecast_sheet_name)
actuals_df = pd.read_excel(actuals_workbook_path, sheet_name=actuals_sheet_name)
print(forecast_df.head())
print(actuals_df.head())

         Category Sucategory Time Period  Expense
0  Employee costs      Wages         Jan    85000
1  Employee costs      Wages         Feb    85000
2  Employee costs      Wages         Mar    85000
3  Employee costs      Wages         Apr    87500
4  Employee costs      Wages         May    87500
         Category Sucategory Time Period       Expense
0  Employee costs      Wages         Jan  84824.384249
1  Employee costs      Wages         Feb  84938.026249
2  Employee costs      Wages         Mar  85035.009233
3  Employee costs      Wages         Apr  87463.619793
4  Employee costs      Wages         May  87263.353172


In [46]:
# Getting response without compressing data
system_prompt, user_prompt = get_prompts(forecast_df, actuals_df,compress=False)
response = get_completion(system_prompt,user_prompt)
print(response)

Total Tokens: 12419
Prompt Tokens: 11916
Completion Tokens: 503
### Variance Analysis: Actual vs. Forecast Expenses

**Overview:**
The analysis compares the forecasted expenses against the actual expenses for various categories under the Travel and Entertainment General Ledger account. 

**1. Total Expenses:**
- **Forecasted Total:** $1,540,000
- **Actual Total:** $1,539,000
- **Variance:** -$1,000
- **Percentage Variance:** -0.065%

**2. Category Breakdown:**
- **Employee Costs:**
  - **Forecasted:** $1,067,000
  - **Actual:** $1,066,000
  - **Variance:** -$1,000
  - **Percentage Variance:** -0.094%

- **Office Costs:**
  - **Forecasted:** $480,000
  - **Actual:** $479,000
  - **Variance:** -$1,000
  - **Percentage Variance:** -0.208%

- **Training/Travel:**
  - **Forecasted:** $200,000
  - **Actual:** $195,000
  - **Variance:** -$5,000
  - **Percentage Variance:** -2.5%

**3. Key Variance Drivers:**
- **Employee Costs:** The slight variance in employee costs can be attributed to mino

In [47]:
# Getting response compressing data
system_prompt, user_prompt = get_prompts(forecast_df, actuals_df,compress=True)
response = get_completion(system_prompt,user_prompt)
print(response)

Total Tokens: 9893
Prompt Tokens: 9350
Completion Tokens: 543
### Variance Analysis: Actual vs. Forecast Expenses

#### Summary of Total Variances
- **Total Forecasted Expenses**: $1,542,130
- **Total Actual Expenses**: $1,518,541
- **Total Variance**: **$23,589** (1.53% under budget)

### Detailed Variance Breakdown by Category

1. **Employee Costs**
   - **Forecast**: $1,355,090
   - **Actual**: $1,354,371
   - **Variance**: **$719** (0.05% under budget)
   - **Main Causes**: Minor variances across wages and benefits, with actual wages slightly lower than forecasted by approximately $56, and benefits lower by around $663.

2. **Office Costs**
   - **Forecast**: $139,040
   - **Actual**: $140,649
   - **Variance**: **-$1,609** (1.16% over budget)
   - **Main Causes**: Overages in office lease costs ($682 more than forecast) and security expenses ($926 more than forecast) contributed to this increase. 

3. **Training/Travel**
   - **Forecast**: $48,000
   - **Actual**: $23,519
   - **V