In [1]:
import pandas as pd
import numpy as np
import json
from tools import calculate_similarity, calculate_performance, calculate_performance_V2, pdf_to_text
import os

In [2]:
json_file = 'articlesJsonTemplate.json'
if os.path.exists(json_file):
    raise FileExistsError("File already found")

df = pd.read_excel('documents/articles.xlsx', sheet_name='6061 Al')

data = []
for index, row in df.iterrows():
    article = {
        "filename": f"Article_{row['Index']}.pdf",
        "title": "" if pd.isna(row['Title']) else row['Title'],
        "link": "" if pd.isna(row['Link']) else row['Link'],
        "extractedText": ""
    }

    data.append(article)

with open('articlesJsonTemplateAl.json', 'w') as f:
    json.dump(data, f, indent=4)

In [None]:
MODEL = "o3-mini"
EFFORT = 'high'
MATERIAL = 'Steel'
INPUT_FILE = f"prompts/promptV2_{MATERIAL}.txt"
OUTPUT_FILE = f'outputs/{MODEL}/promptv2_{EFFORT}_{MATERIAL}_3-23.json'

with open(f'articlesJsonTemplate{MATERIAL}.json') as f:
    articles = json.load(f)

for article in articles:
    print(article['filename'])
    inGroundTruth = False
    articlePresent = False

    if os.path.exists(f"documents/{MATERIAL}/{article['filename']}"): 
        articlePresent = True
    else:
        print('Skipping Because Article not found')
        print(f"documents/{MATERIAL}/{article['filename']}")

    if articlePresent:
        print(f'{article["filename"]} Ready for Analysis')
        input_pdf = f"documents/{MATERIAL}/{article['filename']}"
        article_text = pdf_to_text(input_pdf)
        with open(INPUT_FILE, "r") as file:
            prompt_text = file.read()
        messages=[ 
            {"role": "user", "content": prompt_text},
            {
                "role": "user",
                "content": article_text
            }
        ]
        try:
            completion = client.chat.completions.create(
                model=MODEL,
                messages=messages,
                reasoning_effort=EFFORT
            )
        except Exception as e:
            # Check for an error message that indicates the context is too large.
            if "context" in str(e).lower() or "token" in str(e).lower():
                print("Too large of Input")
                continue
            else:
                raise e

        extractedVals = completion.choices[0].message.content
        print(extractedVals)
        
        if extractedVals.startswith("```json") and extractedVals.endswith("```"):
            extractedVals = extractedVals[7:-3].strip()

        try:
            article['extractedText'] = json.loads(extractedVals)
        except json.JSONDecodeError as e:
            print(f"Saving as TXT - Error decoding JSON: {e}")
            article['extractedText'] = extractedVals
        except Exception as e:
            print("Another Error Occured")
            raise e
            # article['extractedText'] = extractedVals
        # article['extractedText'] = json.loads(extractedVals)
        with open(OUTPUT_FILE, 'w') as f:
            json.dump(articles, f, indent=4)