In [1]:
from financialtools.utils import *

from dotenv import load_dotenv, find_dotenv 
import pandas as pd
import polars as pl
import json
from pydantic import BaseModel, Field
from typing import Literal, List, Dict, Optional
from langchain_core.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser
from langchain_core.prompts import ChatPromptTemplate
from openai import OpenAI
from pprint import pprint
from openai.lib._pydantic import to_strict_json_schema

from financialtools.pydantic_models import StockRegimeAssessment
from financialtools.prompts import system_prompt_noredflags_StockRegimeAssessment
from financialtools.wrappers import read_financial_results
from financialtools.utils import dataframe_to_json

In [2]:
load_dotenv()
client = OpenAI()

In [3]:
Structured_Response = to_strict_json_schema(StockRegimeAssessment)
# Structured_Response

In [4]:
df = pd.read_excel('financial_data/metrics.xlsx')
tickers = df['ticker'].unique()


In [5]:
# year = 2024

tasks = []
for ticker in tickers:
    
    metrics, eval_metrics, composite_scores, red_flags = read_financial_results(
        ticker=ticker,
        # time=year,
        input_dir='financial_data', 
        sheet_name='sheet1')

    metrics, eval_metrics, composite_scores, red_flags = [
        dataframe_to_json(df)
        for df in [metrics, eval_metrics, composite_scores, red_flags]
    ]

    fina_data = f"The stock under analysis has the following ticker: {ticker}\nMetrics:\n{metrics}\nScores:\n{composite_scores}\nEvaluation Metrics:\n{eval_metrics}\n"
    # print(metrics)
    # fina_data = f"The stock under analysis has the following ticker: {ticker}\nMetrics:\n{metrics}\nScores:\n{composite_scores}\nEvaluation Metrics:\n{eval_metrics}\nRedFlags:\n{red_flags}"
    # print(metrics)
    
    task = {
        "custom_id": f"task-{ticker}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            # This is what you would have in your Chat Completions API call
            "model": "gpt-4.1-nano",
            "temperature": 0,
            "response_format": {
                "type": "json_schema",
                "json_schema": {
                  "name": "structured_response",
                  "schema": Structured_Response,
                  "strict": True
                }
            },
            "messages": [
                {
                    "role": "system",
                    "content": system_prompt_noredflags_StockRegimeAssessment
                },
                {
                    "role": "user",
                    "content": fina_data
                }
            ],
        }
    }
    
    tasks.append(task)


# Creating the file

file_name = "batch_files/tasks.jsonl"

with open(file_name, 'w') as file:
    for obj in tasks:
        file.write(json.dumps(obj) + '\n')


# Uploading the file
batch_file = client.files.create(
  file=open(file_name, "rb"),
  purpose="batch"
)

In [6]:
# Creating the batch job
batch_job = client.batches.create(
  input_file_id=batch_file.id,
  endpoint="/v1/chat/completions",
  completion_window="24h"
)

batch_job.id

'batch_68b02d57f5988190b3339429ef17ea0c'

In [7]:
import os
batch_files_folder = 'batch_files/'
batch_files_folder_task = str(batch_files_folder)+str(batch_job.id)
os.makedirs(batch_files_folder_task, exist_ok=True)


In [8]:
new_name = batch_files_folder_task+str('/')+str('input.jsonl')


In [10]:
os.rename(file_name, new_name)

In [11]:
batch_job = client.batches.retrieve(batch_job.id)
# batch_job = client.batches.retrieve('batch_68af597ee8a481909b708b5c1736500f')

In [12]:
import os
# batch_files_folder_task = 'batch_files/batch_68af597ee8a481909b708b5c1736500f'
result_file_name = batch_files_folder_task+str('/output.jsonl')

result_file_id = batch_job.output_file_id
result = client.files.content(result_file_id).content

with open(result_file_name, 'wb') as file:
    file.write(result)

In [13]:
import json
import pandas as pd
from io import StringIO
from pprint import pprint
import rich

In [14]:
def read_jsonl_file(result_file_name):
    # Loading data from saved file
    results = []
    with open(result_file_name, 'r') as file:
        for line in file:
            # Parsing the JSON string into a dict and appending to the list of results
            json_object = json.loads(line.strip())
            results.append(json_object)
    return results


def jsonl_to_df(results):
    # Assuming 'results' is a list of response dictionaries
    data_list = []

    for item in results:
        try:
            json_output = item['response']['body']['choices'][0]['message']['content']
            parsed = json.loads(json_output)

            data_list.append({
                'ticker': parsed.get('ticker'),
                'regime': parsed.get('regime'),
                'regime_rationale': parsed.get('regime_rationale'),
                'metrics_movement': parsed.get('metrics_movement'),
                'non_aligned_findings': parsed.get('non_aligned_findings'),
                'evaluation': parsed.get('evaluation'),
                'evaluation_rationale': parsed.get('evaluation_rationale')
            })
        except (KeyError, json.JSONDecodeError) as e:
            print(f"Skipping item due to error: {e}")
    return pd.DataFrame(data_list)






In [15]:
jsonl_to_df(read_jsonl_file(result_file_name))

Unnamed: 0,ticker,regime,regime_rationale,metrics_movement,non_aligned_findings,evaluation,evaluation_rationale
0,ISP.MI,bear,The company's fundamental scores have declined...,"Net Profit Margin increased steadily, DebtToEq...",Despite low valuation ratios indicating potent...,undervaluated,The stock appears undervalued based on low P/E...
1,ABC.MI,bear,The company's financial metrics in 2020 show n...,"GrossMargin, OperatingMargin, NetProfitMargin,...",The sharp increase in CurrentRatio in 2022 con...,overvaluated,Despite the improvement in the composite score...
2,KME.MI,bear,The company's fundamental scores have declined...,"GrossMargin increased slightly, but OperatingM...","Valuation metrics suggest undervaluation, whic...",undervaluated,Despite weak fundamentals and negative profita...
3,BPE.MI,bear,The company's composite score has declined fro...,"NetProfitMargin increased from 2021 to 2022, t...","Despite stable valuation ratios, the negative ...",undervaluated,The stock's valuation ratios (P/E and P/B) are...
4,BDB.MI,bear,The company's fundamentals show inconsistent a...,NetProfitMargin increased from 11.54% in 2021 ...,"Despite improved profitability in 2023, negati...",undervaluated,The stock's valuation metrics such as P/E and ...
...,...,...,...,...,...,...,...
270,STMMI.MI,bear,The company's composite score sharply declined...,"GrossMargin increased steadily, DebtToEquity d...",The valuation metrics suggest overvaluation in...,overvaluated,"The sharp decline in free cash flow, negative ..."
271,MFEB.MI,bear,The company's composite score has declined sli...,"GrossMargin remained stable around 44-47%, but...",,overvaluated,The stock's valuation multiples have increased...
272,IGD.MI,bear,The company's composite score has declined fro...,"GrossMargin increased steadily, DebtToEquity r...","Despite stable gross margins, profitability an...",undervaluated,Valuation ratios such as P/E and P/B are low o...
273,AMP.MI,bear,The company's fundamentals show a slight decli...,"GrossMargin remained stable around 0.56, Opera...","Despite stable gross margins, profitability an...",overvaluated,Valuation ratios such as P/E and P/B are near ...


In [16]:
df = jsonl_to_df(read_jsonl_file(result_file_name))
# df = jsonl_to_df(read_jsonl_file('batch_files/batch_68af3e45f568819089ffbed2d668ca1c/output.jsonl'))
# Create DataFrame
df.to_excel("AI_results_4y_norf.xlsx", index=False)


