In [5]:
import pytest

import json
import jsonschema
from jsonschema import validate

# Define the structure of the JSON schema
schema = {
    "type": "object",
    "patternProperties": {
        ".*_metrics": {
            "type": "object",
            "properties": {
                "description": {"type": "string"},
                "metric_description": {
                    "type": "object",
                    "additionalProperties": {"type": "string"}
                },
                "class_weight": {"type": "integer"},
                "metric_weights": {
                    "type": "object",
                    "additionalProperties": {"type": "number"}
                },
                "metrics": {
                    "type": "object",
                    "additionalProperties": {
                        "type": "object",
                        "properties": {
                            "lower_is_better": {"type": "boolean"},
                            "thresholds": {
                                "type": "array",
                                "items": {
                                    "type": "array",
                                    "items": {"type": "number"}
                                }
                            }
                        },
                        "required": ["lower_is_better", "thresholds"]
                    }
                }
            },
            "required": ["description", "metric_description", "class_weight", "metric_weights", "metrics"]
        }
    },
    "additionalProperties": False
}


def test_json_schema(json_data):
    validate(instance=json_data, schema=schema)


In [9]:
json_data = json.load(open("public/metrics_large.json"))

In [10]:
def test_json_schema(json_data):
    validate(instance=json_data, schema=schema)

In [12]:
schema

{'type': 'object',
 'patternProperties': {'.*_metrics': {'type': 'object',
   'properties': {'description': {'type': 'string'},
    'metric_description': {'type': 'object',
     'additionalProperties': {'type': 'string'}},
    'class_weight': {'type': 'integer'},
    'metric_weights': {'type': 'object',
     'additionalProperties': {'type': 'number'}},
    'metrics': {'type': 'object',
     'additionalProperties': {'type': 'object',
      'properties': {'lower_is_better': {'type': 'boolean'},
       'thresholds': {'type': 'array',
        'items': {'type': 'array', 'items': {'type': 'number'}}}},
      'required': ['lower_is_better', 'thresholds']}}},
   'required': ['description',
    'metric_description',
    'class_weight',
    'metric_weights',
    'metrics']}},
 'additionalProperties': False}

In [19]:
def parse_credit_assessment_schema(schema):
    result = []
    for category, details in schema.items():
        result.append(f"{category.replace('_', ' ').title()}:\n  Description: {details['description']}\n  Class Weight: {details['class_weight']}%")
        for metric, metric_desc in details['metric_description'].items():
            result.append(f"  Metric: {metric.replace('_', ' ').title()}\n    Description: {metric_desc}\n    Weight: {details['metric_weights'][metric] * 100}%")
            thresholds = "\n".join(
                [f"      {thr[0]} to {thr[1]}: {'Lower is better' if details['metrics'][metric]['lower_is_better'] else 'Higher is better'}"
                 for thr in details['metrics'][metric]['thresholds']]
            )
            result.append(f"    Thresholds:\n{thresholds}")
    return "\n".join(result)

# Sample input
schema = {
    "profitability_metrics": {
        "description": "Measures a company's ability to generate sustainable earnings and absorb losses.",
        "metric_description": {
            "net_income_to_assets": "Shows how efficiently a company generates profit from its assets by comparing net income to total assets."
        },
        "class_weight": 35,
        "metric_weights": {
            "net_income_to_assets": 0.55
        },
        "metrics": {
            "net_income_to_assets": {
                "lower_is_better": True,
                "thresholds": [
                    [40, float('inf')],
                    [35, 40],
                    [30, 35],
                    [25, 30],
                    [20, 25],
                    [15, 20],
                    [10, 15],
                    [5, 10],
                    [-float('inf'), 5]
                ]
            }
        }
    },
    # Add other categories here...
}

# Output text
print(parse_credit_assessment_schema(schema))


Profitability Metrics:
  Description: Measures a company's ability to generate sustainable earnings and absorb losses.
  Class Weight: 35%
  Metric: Net Income To Assets
    Description: Shows how efficiently a company generates profit from its assets by comparing net income to total assets.
    Weight: 55.00000000000001%
    Thresholds:
      40 to inf: Lower is better
      35 to 40: Lower is better
      30 to 35: Lower is better
      25 to 30: Lower is better
      20 to 25: Lower is better
      15 to 20: Lower is better
      10 to 15: Lower is better
      5 to 10: Lower is better
      -inf to 5: Lower is better


In [22]:
schema = json.load(open("public/metrics_large_finance_companies.json"))

In [30]:

def parse_credit_assessment_schema(schema):
    MAPPED_RATINGS = ["Aaa", "Aa", "A", "Baa", "Ba", "B", "Caa", "Ca", "C"]

    result = []
    for category, details in schema.items():
        result.append(f"\n{category.replace('_', ' ').upper()}:\n  Description: {details['description']}\n  Class Weight: {details['class_weight']}%")
        for metric, metric_desc in details['metric_description'].items():
            result.append(f"  Metric: {metric.replace('_', ' ').title()}\n    Description: {metric_desc}\n    Weight: {details['metric_weights'][metric] * 100:.2f}%")
            
            thresholds = details['metrics'][metric]['thresholds']
            order = "Increasing" if not details['metrics'][metric]['lower_is_better'] else "Decreasing"
            thresholds_text = ", ".join([f"{thr[0]} to {thr[1]}" for thr in thresholds])
            
            # Reverse ratings if order is Increasing (lower_is_better=False)
            ratings = MAPPED_RATINGS[::-1] if details['metrics'][metric]['lower_is_better'] else MAPPED_RATINGS
            
            result.append(f"    Rating: {', '.join(ratings)}")
            result.append(f"    Thresholds: {thresholds_text}")
            result.append(f"    Order Preference: {order} order preferred")
    return "\n".join(result)

In [49]:
print(parse_credit_assessment_schema(schema))


PROFITABILITY METRICS:
  Description: Measures a company's ability to generate sustainable earnings and absorb losses.
  Class Weight: 35%
  Metric: Net Income To Assets
    Description: Shows how efficiently a company generates profit from its assets by comparing net income to total assets.
    Weight: 55.00%
    Rating: Aaa, Aa, A, Baa, Ba, B, Caa, Ca, C
    Thresholds: 40 to inf, 35 to 40, 30 to 35, 25 to 30, 20 to 25, 15 to 20, 10 to 15, 5 to 10, -inf to 5
    Order Preference: Increasing order preferred

CAPITAL ADEQUACY AND LEVERAGE METRICS:
  Description: Assesses the company's capacity to absorb volatility, manage debt, and maintain financial flexibility.
  Class Weight: 45%
  Metric: Equity To Tangible Assets
    Description: Indicates how much of a company's tangible assets are financed by its own equity, reflecting financial strength and asset-backed stability.
    Weight: 100.00%
    Rating: C, Ca, Caa, B, Ba, Baa, A, Aa, Aaa
    Thresholds: -inf to 0.25, 0.25 to 0.5, 0.5 

In [82]:

TEMPLATE = """

GIVEN THE FOLLOWING PROMPT I NEED YOU TO IMPROVE IT TO GIVE A SENIOR LEVEL
CREDIT RISK ANALYSIS THE PROMPT SHOULD CONSIDER ALL FACTORS AND VARIABLES 
PROVIDED AND GIVE A HIGH QUANITY ANALYSIS AS PER THE GUIDLINES
You are a helpful senior AI Research. Guide the user through the solution step by step.


I WANT THE ANALYSIS TO BE LONG AND CREATIVE IT SHOULD CONSIDER ALL RELVENT ASPECT GIVEN 
THE DATA PROVIDED 


Role: Senior Credit Analyst

Task: Conduct a comprehensive credit risk assessment and produce a detailed
credit analysis report for a company based on provided financial data. Consider
the MODEL_PROFILE and how the metrics and descriptions relate to each other
think through the parameters provide an analysis. The analysis should be complete
and comprehensive.

Input Data:
- DATA1: Time series of financial ratios and metrics
- DATA2: Mean values of these metrics across the time series

MODEL_PROFILE:
PROFITABILITY METRICS:
  Description: Measures a company's ability to generate sustainable earnings and absorb losses.
  Class Weight: 35%
  Metric: Net Income To Assets
    Description: Shows how efficiently a company generates profit from its assets by comparing net income to total assets.
    Weight: 55.00%
    Rating: Aaa, Aa, A, Baa, Ba, B, Caa, Ca, C
    Thresholds: 40 to inf, 35 to 40, 30 to 35, 25 to 30, 20 to 25, 15 to 20, 10 to 15, 5 to 10, -inf to 5
    Order Preference: Increasing order preferred

CAPITAL ADEQUACY AND LEVERAGE METRICS:
  Description: Assesses the company's capacity to absorb volatility, manage debt, and maintain financial flexibility.
  Class Weight: 45%
  Metric: Equity To Tangible Assets
    Description: Indicates how much of a company's tangible assets are financed by its own equity, reflecting financial strength and asset-backed stability.
    Weight: 100.00%
    Rating: C, Ca, Caa, B, Ba, Baa, A, Aa, Aaa
    Thresholds: -inf to 0.25, 0.25 to 0.5, 0.5 to 0.75, 0.75 to 1, 1 to 1.5, 1.5 to 2, 2 to 3, 3 to 5, 5 to inf
    Order Preference: Decreasing order preferred

ASSET QUALITY METRICS:
  Description: Evaluates the risk and performance of assets, focusing on impairments and losses.
  Class Weight: 20%
  Metric: Problem Loans To Gross Loans
    Description: Indicates the proportion of loans that may not be repaid, signaling the companyâ€™s loan quality.
    Weight: 50.00%
    Rating: Aaa, Aa, A, Baa, Ba, B, Caa, Ca, C
    Thresholds: 5 to inf, 4 to 5, 3 to 4, 2.5 to 3, 2 to 2.5, 1.5 to 2, 1 to 1.5, 0.5 to 1, -inf to 0.5
    Order Preference: Increasing order preferred
  Metric: Net Charge Offs To Gross Loans
    Description: Measures loan losses after recoveries, highlighting the companyâ€™s effectiveness in managing bad debts.
    Weight: 50.00%
    Rating: Aaa, Aa, A, Baa, Ba, B, Caa, Ca, C
    Thresholds: 5 to inf, 4 to 5, 3 to 4, 2.5 to 3, 2 to 2.5, 1.5 to 2, 1 to 1.5, 0.5 to 1, -inf to 0.5
    Order Preference: Increasing order preferred

2. Synthesize Overall Analysis:
   - Develop a cohesive analysis with a specific theme (e.g., financial stability, growth potential, risk exposure)
   - Create a poetic theme title that evokes emotion and relates to credit analysis (use markdown h3 format, e.g., '### [Your Title]')
   - Provide a comprehensive assessment of the company's creditworthiness, integrating insights from all analyzed areas
   - Support all conclusions with specific metrics and time periods from the provided data
   - It should be one or two paragraphs

3. Highlight Key Strengths:
   - Identify and explain factors that enhance the company's creditworthiness
   - Cite relevant metrics and trends to support each strength

4. Identify Potential Risks:
   - Outline factors that could negatively impact the company's financial health or ability to meet obligations
   - Assess the likelihood and potential impact of each risk

5. Conclude with Credit Outlook:
   - Provide a forward-looking statement on the company's credit outlook, considering all analyzed factors

6. Formatting Guidelines:
   - Use markdown to separate sections and improve readability
   - Ensure all assertions are supported by quantitative evidence from the provided data

Output Format (JSON):
"overall_analysis": "string",
"key_strengths": list["string"],
"potential_risks": list["string"],
"response": true

Input Data:
DATA1: {DATA1}
DATA2: {DATA2}

"""

In [83]:
from openai import OpenAI
import google.generativeai as genai
import os
import json

In [84]:
GOOGLE_API_KEY = "AIzaSyBvCf8bEVqoxLNL7hQcmYDT3153NC1Ea8E"
OPENAI_API_KEY = "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"

In [85]:
def get_openai_response(content: str) -> dict:
   client = OpenAI(api_key=OPENAI_API_KEY)

   completion = client.chat.completions.create(
      model="o1-preview",
      messages=[
         {"role": "user", "content": content},
      ],
   )

   return completion.choices[0].message.content #eval(completion.choices[0].message.content)

In [86]:
response2 = get_openai_response(TEMPLATE)

In [87]:
print(response2)

Certainly! Here's an improved version of your prompt that is designed to elicit a senior-level, comprehensive, and creative credit risk analysis. This version ensures that all factors and variables are considered, and the analysis is detailed and aligned with your guidelines.

---

**Role:** Senior Credit Analyst

**Task:** Conduct an in-depth credit risk assessment and produce a detailed credit analysis report for a company based on the provided financial data. Your analysis should be complete, comprehensive, and creative, considering all relevant aspects given the data provided. Focus on how the metrics and descriptions in the **MODEL_PROFILE** relate to each other, and think critically through the parameters to provide a high-quality analysis.

**Instructions:**

1. **Analyze the Data:**
   - **Examine DATA1 and DATA2 thoroughly:**
     - **DATA1:** Time series of financial ratios and metrics.
     - **DATA2:** Mean values of these metrics across the time series.
   - For each metri

In [81]:
print(response1)

**Role:** Senior Credit Analyst

**Task:** As a Senior Credit Analyst, your mission is to conduct a comprehensive credit risk assessment and produce a detailed credit analysis report for the company based on the provided financial data. Your analysis should meticulously consider the **MODEL_PROFILE**, understanding how the metrics and descriptions interrelate. Provide an in-depth, high-quality analysis that is complete and comprehensive, suitable for senior management review.

**Input Data:**
- **DATA1:** Time series of financial ratios and metrics
- **DATA2:** Mean values of these metrics across the time series

---

### **MODEL_PROFILE**

**PROFITABILITY METRICS:**
- **Description:** Measures a company's ability to generate sustainable earnings and absorb losses.
- **Class Weight:** 35%
- **Metric:** **Net Income To Assets**
  - **Description:** Shows how efficiently a company generates profit from its assets by comparing net income to total assets.
  - **Weight:** 55.00%
  - **Ratin

In [77]:
print(response)

Certainly! Below is an enhanced version of your original prompt, tailored to facilitate a **Senior-Level Credit Risk Analysis**. This improved prompt ensures comprehensive consideration of all factors and variables, promotes high-quality analysis, and adheres to the provided guidelines.

---

### **Enhanced Prompt for Senior-Level Credit Risk Analysis**

**System Role:**
You are an expert Senior Credit Analyst with extensive experience in financial risk assessment. Your role is to conduct an in-depth credit risk analysis, integrating complex financial data and sophisticated analytical techniques to evaluate the creditworthiness of a company.

**Task Overview:**
Perform a thorough credit risk assessment and generate a detailed credit analysis report for a company using the provided financial data. Your analysis should encompass all relevant factors and variables, aligning with the specified `MODEL_PROFILE`. Ensure that metrics and their interrelationships are meticulously evaluated to d