# Test Task

## Loading envirement variables

In [24]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()



True

## Loading and exploring the data file

In [25]:
import pandas as pd

# Load all sheets
excel_file = "Ground Truth Transitions_13 problems_RL.xlsx"
sheets = pd.read_excel(excel_file, sheet_name=None)

# Combine data from all sheets
data = []

for sheet_name, df in sheets.items():
    if 'Problem' in df.columns and 'Condition' in df.columns and 'ChangeOfState' in df.columns:
        for _, row in df.iterrows():
            data.append({
                "problem_id": sheet_name,
                "description": row['Problem'],
                "condition": str(row['Condition']).strip(),
                "state": str(row['ChangeOfState']).strip().lower()
            })

# Create combined DataFrame
df_all = pd.DataFrame(data)




In [26]:
print(df_all)


    problem_id                                        description condition  \
0     Problem1  # **Question:** A gas in a cylinder is compres...      True   
1     Problem1                                                NaN      True   
2     Problem1  # The gas is ideal, with R = 287 J/(kg K) and ...      True   
3     Problem1                                                NaN     False   
4     Problem1                #**Answer:** $p = 118778.571428571$     False   
..         ...                                                ...       ...   
124  Problem13                                                NaN     False   
125  Problem13                                                NaN      True   
126  Problem13                                                NaN     False   
127  Problem13                                                NaN     False   
128  Problem13                                                NaN      True   

              state  
0       equilibrium  
1      


## Cleaning dataset to remove unnessary information

In [27]:
import re

# Function to clean a single problem description
def clean_problem_text(text):
    if not isinstance(text, str):
        return ""
    
    # Remove anything after "# **Answer:**" or "#**Answer:**"
    text = re.split(r'#\s*\*\*Answer:\*\*', text)[0]
    
    # Optionally: Remove trailing spaces and redundant newlines
    text = text.strip()
    text = re.sub(r'\n+', '\n', text)
    
    return text

# Apply cleaning function to the 'description' column
df_all['description'] = df_all['description'].apply(clean_problem_text)


In [28]:
df_all[:7]

Unnamed: 0,problem_id,description,condition,state
0,Problem1,# **Question:** A gas in a cylinder is compres...,True,equilibrium
1,Problem1,,True,adiabatic
2,Problem1,"# The gas is ideal, with R = 287 J/(kg K) and ...",True,reversible
3,Problem1,,False,is_isothermal
4,Problem1,,False,is_isochoric
5,Problem1,,True,is_polytropic
6,Problem1,,False,is_isobaric


## Classifications using LLM

In [29]:
import time
import json
import pandas as pd

from ollama import chat
from ollama import ChatResponse

# Initialize prompt template
prompt_template = """
You are tasked with identifying the thermodynamic changes of state occurring in the following problem description.
ONLY choose from the following list of all possible changes:
- equilibrium
- adiabatic
- reversible
- is_isothermal
- is_isochoric
- is_polytropic
- is_isobaric
- is_isenthalpic
- is_isentropic
Problem Description: 
"""

responses = []

# First group by 'problem_id'
grouped = df_all.groupby('problem_id')

# Loop through each group
for problem_id, group in grouped:
    # Concatenate all descriptions for this problem_id
    all_descriptions = group['description'].dropna().tolist()
    combined_description = "\n".join(all_descriptions)

    # Build full prompt
    full_prompt = prompt_template + combined_description

    full_prompt += """\nRespond STRICTLY ONLY with a Python list of applicable changes.
                Example format: ["adiabatic", "reversible", "is_isochoric", "is_isenthalpic"].
                Do NOT add any explanation, intro, or text. Only the Python list including all possible changes.
                """

    try:
        #=======================================================================
        # This is the Ollama API call.
        response: ChatResponse = chat(model='llama3.1:8b', messages=[
            {
                'role': 'user',
                'content': full_prompt,
                'temperature': 0.0,
            },
        ])
        print(response['message']['content'])

        llm_output = response.message.content

        #=======================================================================


        # #=======================================================================
        # # This is the Groq API call. We can use it instead of the Ollama API to test
        # # various LLM models.
        # # Make sure to set the environment variable GROQ_API_KEY with your API key
        # # if you want to use this.

        # import os

        # from groq import Groq

        # client = Groq(
        #     api_key=os.environ.get("GROQ_API_KEY"),
        # )

        # chat_completion = client.chat.completions.create(
        #     messages=[
        #         {
        #             'role': 'user',
        #             'content': full_prompt,
        #         }
        #     ],
        #     model="llama-3.3-70b-versatile",
        # )

        # print(chat_completion.choices[0].message.content)

        # llm_output = chat_completion.choices[0].message.content

        # #=======================================================================



        # Extracting list of states from the LLM output
        # Find the first and last brackets in the output
        # This is a bit of a hack, but it works for the expected output format
        # We assume the output is a well-formed JSON list
        # If the output is not well-formed, we will catch the JSONDecodeError
        # and print a warning message
        # and append an empty list to the responses


        first_bracket = llm_output.find('[')
        last_bracket = llm_output.rfind(']')
        
        predicted_states = []
        if first_bracket != -1 and last_bracket != -1:
            list_text = llm_output[first_bracket:last_bracket+1]
            cleaned_list_text = list_text.replace("'", '"').strip()
            try:
                predicted_states = json.loads(cleaned_list_text)
                print(f"Predicted states: {predicted_states}")
            except json.JSONDecodeError:
                print(f"Warning! Could not parse JSON for problem {problem_id}. Raw text: {list_text}")

        # Now for each row in this group, record the same prediction
        # because problem description is the same for all rows of a particular problem_id
        for _, row in group.iterrows():
            responses.append({
                "problem_id": problem_id,
                "description": row['description'],
                "condition": row['condition'],
                "original_state": row['state'],
                "llm_prediction": predicted_states
            })
        print(f"Done with problem {problem_id}")

    except Exception as e:
        print(f"Error! processing problem {problem_id}: {e}")
        # If there is an error, append empty prediction for all rows in the group
        for _, row in group.iterrows():
            responses.append({
                "problem_id": problem_id,
                "description": row['description'],
                "condition": row['condition'],
                "original_state": row['state'],
                "llm_prediction": []
            })

    time.sleep(0.1)  # optional sleep to reduce load on the API

# Save results
df_llm_predictions = pd.DataFrame(responses)


["adiabatic", "reversible", "is_isothermal", "is_isobaric"]
Predicted states: ['adiabatic', 'reversible', 'is_isothermal', 'is_isobaric']
Done with problem Problem1
["adiabatic", "reversible", "is_isothermal", "is_isochoric", "is_polytropic", "is_isobaric", "is_isenthalpic"]
Predicted states: ['adiabatic', 'reversible', 'is_isothermal', 'is_isochoric', 'is_polytropic', 'is_isobaric', 'is_isenthalpic']
Done with problem Problem10
['equilibrium', 'adiabatic', 'reversible', 'is_isothermal', 'is_polytropic']
Predicted states: ['equilibrium', 'adiabatic', 'reversible', 'is_isothermal', 'is_polytropic']
Done with problem Problem11
["adiabatic", "is_isothermal", "is_isobaric", "is_polytropic", "is_isentropic"]
Predicted states: ['adiabatic', 'is_isothermal', 'is_isobaric', 'is_polytropic', 'is_isentropic']
Done with problem Problem12
['adiabatic', 'is_polytropic', 'is_isobaric']
Predicted states: ['adiabatic', 'is_polytropic', 'is_isobaric']
Done with problem Problem13
['reversible', 'is_isen

In [30]:
df_llm_predictions[:10]

Unnamed: 0,problem_id,description,condition,original_state,llm_prediction
0,Problem1,# **Question:** A gas in a cylinder is compres...,True,equilibrium,"[adiabatic, reversible, is_isothermal, is_isob..."
1,Problem1,,True,adiabatic,"[adiabatic, reversible, is_isothermal, is_isob..."
2,Problem1,"# The gas is ideal, with R = 287 J/(kg K) and ...",True,reversible,"[adiabatic, reversible, is_isothermal, is_isob..."
3,Problem1,,False,is_isothermal,"[adiabatic, reversible, is_isothermal, is_isob..."
4,Problem1,,False,is_isochoric,"[adiabatic, reversible, is_isothermal, is_isob..."
5,Problem1,,True,is_polytropic,"[adiabatic, reversible, is_isothermal, is_isob..."
6,Problem1,,False,is_isobaric,"[adiabatic, reversible, is_isothermal, is_isob..."
7,Problem1,,False,is_isenthalpic,"[adiabatic, reversible, is_isothermal, is_isob..."
8,Problem1,,True,is_isentropic,"[adiabatic, reversible, is_isothermal, is_isob..."
9,Problem1,,,,"[adiabatic, reversible, is_isothermal, is_isob..."


## Matching LLM results to ground truth values

In [31]:
def check_match(row):
    if pd.isna(row['original_state']):
        return False  # If no original_state, consider it incorrect
   
    if pd.isna(row['condition']):
        return False  # If no condition value, consider it incorrect
    
    return row['original_state'] in row['llm_prediction'] and row['condition'] == "True"

# Creating 'is_correct' column in the DataFrame
df_llm_predictions['is_correct'] = df_llm_predictions.apply(check_match, axis=1)

# Calculating Per-problem_id accuracy of each problem sepratly.
problem_accuracy = df_llm_predictions.groupby('problem_id')['is_correct'].mean()

# Showing per-problem_id accuracy
print("--> Accuracy per Problem ID:")
print(problem_accuracy.apply(lambda x: f"{x:.2%}"))

# Overall accuracy as average of per-problem accuracies
overall_accuracy = problem_accuracy.mean()
print(f"\n--> Overall Accuracy (average of per-problem accuracies): {overall_accuracy:.2%}")

#Save final DataFrame in CSV format
df_llm_predictions.to_csv('llm_results.csv', index=False)
print("Results saved to 'llm_results.csv'.")




--> Accuracy per Problem ID:
problem_id
Problem1      9.52%
Problem10    22.22%
Problem11     0.00%
Problem12    11.11%
Problem13    22.22%
Problem2     22.22%
Problem3     44.44%
Problem4      0.00%
Problem5     44.44%
Problem6      0.00%
Problem7     33.33%
Problem8     11.11%
Problem9     22.22%
Name: is_correct, dtype: object

--> Overall Accuracy (average of per-problem accuracies): 18.68%
Results saved to 'llm_results.csv'.


In [32]:
df_llm_predictions

Unnamed: 0,problem_id,description,condition,original_state,llm_prediction,is_correct
0,Problem1,# **Question:** A gas in a cylinder is compres...,True,equilibrium,"[adiabatic, reversible, is_isothermal, is_isob...",False
1,Problem1,,True,adiabatic,"[adiabatic, reversible, is_isothermal, is_isob...",True
2,Problem1,"# The gas is ideal, with R = 287 J/(kg K) and ...",True,reversible,"[adiabatic, reversible, is_isothermal, is_isob...",True
3,Problem1,,False,is_isothermal,"[adiabatic, reversible, is_isothermal, is_isob...",False
4,Problem1,,False,is_isochoric,"[adiabatic, reversible, is_isothermal, is_isob...",False
...,...,...,...,...,...,...
124,Problem9,,False,is_isochoric,"[adiabatic, is_isothermal, is_isobaric, is_pol...",False
125,Problem9,,True,is_polytropic,"[adiabatic, is_isothermal, is_isobaric, is_pol...",True
126,Problem9,,False,is_isobaric,"[adiabatic, is_isothermal, is_isobaric, is_pol...",False
127,Problem9,,False,is_isenthalpic,"[adiabatic, is_isothermal, is_isobaric, is_pol...",False
