In [3]:
import pandas as pd

In [5]:
def check_strength(training_data, desired_strength):
    # Check if any strength in the selected data is >= desired_strength
    return any(s['strength'] >= desired_strength for s in training_data)

def select_and_format_data(data, desired_strength):
    while True:
        # Randomly select 4 data points
        selected_data = data.sample(n=4)  # Removed random_state for true randomness

        # Parse selected data into the required format
        training_data_formatted = []
        tested_formulations = []
        for _, row in selected_data.iterrows():
            # Ensure these column names match your DataFrame exactly
            powder = row["Powderkg"]
            wc = row["WC"]
            materials = row["Materials"]
            # Extract Fly Ash/GGBFS ratio and curing method from materials as done previously
            fa_ggbfs = materials.split(",")[0].split("-")[1]
            curing_method = materials.split(",")[-1].strip()
            curing_method = curing_method.replace(" (Rao et al. 2018)", "").replace(" (Rao et al.)", "")
            strength = row["fc_28dGroundTruth"]
            
            training_str = f"Powderkg = {powder}, wc = {wc}, materials = {fa_ggbfs}, curing = {curing_method} resulted in a strength of {strength} MPa."
            training_data_formatted.append({'formatted_str': training_str, 'strength': strength})
            tested_str = f"Powderkg = {powder}, wc = {wc}, materials = {fa_ggbfs}, curing = {curing_method}"
            tested_formulations.append(tested_str) 

        # Check if any selected data point's strength >= desired_strength
        if not check_strength(training_data_formatted, desired_strength):
            # Return both the formatted training data and the tested formulations
            return [d['formatted_str'] for d in training_data_formatted], tested_formulations
        
def find_matching_result(df, suggestion):
    if suggestion:
        # Create the suggestion string in the same format as the formulation strings
        suggestion_str = f'The formulation is Powderkg = {suggestion["powderkg"]}, wc = {suggestion["wc"]}, materials = {suggestion["materials"]}, curing = {suggestion["curing"]}'
        TrainingDat = f'Powderkg = {suggestion["powderkg"]}, wc = {suggestion["wc"]}, materials = {suggestion["materials"]}, curing = {suggestion["curing"]}'
        
        # Look for a match in the DataFrame
        match = df[df["Formulation"].str.lower() == suggestion_str.lower()]
        
        # If a match was found, return the lab result and TrainingDat
        if not match.empty:
            return match.iloc[0]["Strength"], TrainingDat
        
        # If no match was found, print the suggestion string for debugging
        else:
            print("No match found for suggestion string: ", suggestion_str)

    # If no suggestion provided or no match found, return None
    return None, None


import re
import json

def parse_solution(response):
    # Initialize a dictionary to hold the solution
    solution = {}

    # Function to normalize key names
    def normalize_key(key):
        # Normalize common variations to a standard form
        key_map = {
            'powderkg': 'powderkg',
            'wc': 'wc',
            'materials': 'materials',
            'curing': 'curing'
        }
        for known_key, normalized_key in key_map.items():
            if known_key in key.lower().replace(" ", ""):
                return normalized_key
        return None

    # Try to parse the response as JSON
    try:
        json_data = json.loads(response)
        for key, value in json_data.items():
            normalized_key = normalize_key(key)
            if normalized_key:
                solution[normalized_key] = str(value)
        if solution:  # If we successfully extracted data
            return solution
    except json.JSONDecodeError:
        # If JSON parsing fails, proceed with regex parsing for the plain text format
        keys = ['powderkg', 'wc', 'materials', 'curing']  # Updated to include 'curing'
        for key in keys:
            if key != 'curing':  # For 'curing', we might need a different approach
                match = re.search(fr"{key} = (.*?)(,|$)", response, re.IGNORECASE)
                if match:
                    value = match.group(1).strip()
                    solution[key] = value
                else:
                    # If any key wasn't found using regex, return None
                    return None
            else:
                # Handle 'curing' specifically based on the presence of keywords
                if "ambient" in response.lower():
                    solution["curing"] = "Ambient curing"
                elif "heat" in response.lower():
                    solution["curing"] = "Heat curing"
                else:
                    # If 'curing' condition is not met, return None
                    return None
        return solution  # Return the solution dictionary if all keys were found with regex

    # Return None if neither JSON nor regex parsing succeeded
    return None

    
def format_response_to_model(lab_result):
    """
    Given a lab result, format a response message to the model.
    """
    return f"We've achieved a compressive strength of {lab_result['fc_28d_Lab_validation']} MPa. Let's try to do better!"

def parse_materials(materials_str):
    match = re.search(r'(\d+)/(\d+) FA/GGBFS', materials_str)
    if match:
        return int(match.group(1)) / (int(match.group(1)) + int(match.group(2)))
    else:
        return None
    
def parse_curing(materials_str):
    if "Ambient curing" in materials_str:
        return "ambient"
    elif "Heat curing" in materials_str:
        return "oven"
    else:
        return None

def load_data(csv_path):
    df = pd.read_csv(csv_path)
    df['FA_GGBFS_ratio'] = df['Materials'].apply(parse_materials)
    df['curing'] = df['Materials'].apply(parse_curing)  # Add this line
    return df

def extract_formulations_from_training_data(training_data):
    pattern = re.compile(r'Powderkg\s*=\s*(\d+),\s*wc\s*=\s*(\d+\.\d+),\s*materials\s*=\s*(\d+\.\d+/\d+\.\d+),\s*curing\s*=\s*(\w+)', re.IGNORECASE)
    training_formulations = [match.group(0) for data in training_data for match in [pattern.search(data)] if match]
    return training_formulations

def handle_openai_error(exception):
    if isinstance(exception, openai.error.RateLimitError):
        print(f"Rate limit error. Will retry after {exception.wait_seconds} seconds.")
        time.sleep(exception.wait_seconds)
    elif isinstance(exception, openai.error.InvalidRequestError):
        print(f"Invalid request: {str(exception)}")
    elif isinstance(exception, openai.error.AuthenticationError):
        print(f"Authentication error: {str(exception)}")
    elif isinstance(exception, openai.error.ServiceUnavailableError):
        print(f"Service unavailable error. Retrying after a delay...")
        time.sleep(5)  # Sleep for 5 seconds before retrying
    elif isinstance(exception, openai.error.APIError):
        print(f"API error: {str(exception)}. Retrying after a delay...")
        time.sleep(5)  # Sleep for 5 seconds before retrying
    elif isinstance(exception, openai.error.Timeout):
        print(f"Timeout error: {str(exception)}. Retrying after a longer delay...")
        time.sleep(10)  # Sleep for 10 seconds before retrying
    else:
        raise exception
        
# -> here we also set the API parameters, such as temperature, etc.

def call_openai_api(messages,temp, max_retries=5, delay=5):
    for i in range(max_retries):
        try:
            response = openai.chat.completions.create(
                model= casperhansen/mixtral-instruct-awq,
                temperature=temp,
                messages=messages,
                max_tokens=500,
                n=1
            )
            return response
        except openai.error.OpenAIError as e:
            
            handle_openai_error(e)
            if i < max_retries - 1:  # i is zero indexed
                time.sleep(delay)  # wait before trying again
                continue
            else:
                raise

def format_discovery_data_for_training():
    df = load_data('../data/DiscoveryData_Sample.csv')
    
    # Initialize empty DataFrame
    formulation_df = pd.DataFrame([],columns=["Formulation", "Strength"])
    
    # Loop through each row in the original data
    for idx, row in df.iterrows():
        
        # Get necessary attributes from row
        powder = row["Powderkg"]
        wc = row["WC"]
        materials = row["Materials"]
    
        # Extract Fly Ash/GGBFS ratio
        fa_ggbfs = materials.split(",")[0].split("-")[1]
        
        # Extract curing method
        curing_method = materials.split(",")[-1].strip()
    
        # Remove unwanted string from curing method
        curing_method = curing_method.replace(" (Rao et al. 2018)", "")
        curing_method = curing_method.replace(" (Rao et al.)", "")
        
        # Compressive strength
        strength = row["fc_28dGroundTruth"]
        
        # Create formulation string in the same format as the model's output
        formulation = f'The formulation is Powderkg = {powder}, wc = {wc}, materials = {fa_ggbfs}, curing = {curing_method}'
        
        # Append the formulation and its respective strength to the new DataFrame
        new_row = pd.DataFrame({"Formulation": [formulation], "Strength": [strength]})
        
        formulation_df = pd.concat([formulation_df if not formulation_df.empty else None, new_row], ignore_index=True)
    return formulation_df


format_discovery_data_for_training()

                                         Formulation  Strength
0  The formulation is Powderkg = 360, wc = 0.45, ...     55.37
                                         Formulation  Strength
0  The formulation is Powderkg = 360, wc = 0.45, ...     55.37
1  The formulation is Powderkg = 360, wc = 0.5, m...     59.79
                                         Formulation  Strength
0  The formulation is Powderkg = 360, wc = 0.45, ...     55.37
1  The formulation is Powderkg = 360, wc = 0.5, m...     59.79
2  The formulation is Powderkg = 360, wc = 0.55, ...     51.61
                                         Formulation  Strength
0  The formulation is Powderkg = 360, wc = 0.45, ...     55.37
1  The formulation is Powderkg = 360, wc = 0.5, m...     59.79
2  The formulation is Powderkg = 360, wc = 0.55, ...     51.61
3  The formulation is Powderkg = 360, wc = 0.6, m...     46.68
                                         Formulation  Strength
0  The formulation is Powderkg = 360, wc = 0.45, ...   

Unnamed: 0,Formulation,Strength
0,"The formulation is Powderkg = 360, wc = 0.45, ...",55.37
1,"The formulation is Powderkg = 360, wc = 0.5, m...",59.79
2,"The formulation is Powderkg = 360, wc = 0.55, ...",51.61
3,"The formulation is Powderkg = 360, wc = 0.6, m...",46.68
4,"The formulation is Powderkg = 370, wc = 0.45, ...",55.62
...,...,...
235,"The formulation is Powderkg = 440, wc = 0.6, m...",28.65
236,"The formulation is Powderkg = 450, wc = 0.45, ...",38.63
237,"The formulation is Powderkg = 450, wc = 0.5, m...",44.49
238,"The formulation is Powderkg = 450, wc = 0.55, ...",35.20
