In [53]:
import pandas as pd
import json
import importlib
from rapidfuzz import process, fuzz
from ai_integration.chatgpt_kpi_assistant import ai_fallback_column_mapping
from ai_integration.chatgpt_kpi_assistant import ai_suggest_column_category
pd.set_option('display.max_columns', None)

In [54]:
class DataHandler:
    def __init__(self, source_type, source, mapping_path):
        """
        :param source_type: Type of data source (excel, csv, db, api, json).
        :param source: File path, database connection, or API endpoint.
        :param mapping_path: Path to JSON column mapping file.

        """
        self.source_type = source_type
        self.source = source
        self.mapping_path = mapping_path
        self.column_mappings = self._load_column_mapping()
        self.data = None
        

    def load_data(self):
        """Load data based on the selected source type and apply column mapping."""
        try:
            if self.source_type == "excel":
                self.data = pd.read_excel(self.source)
            elif self.source_type == "csv":
                self.data = pd.read_csv(self.source)
            elif self.source_type == "db":
                self.data = self._load_from_db()
            elif self.source_type == "api":
                self.data = self._load_from_api()
            elif self.source_type == "json":
                self.data = self._load_from_json()
            else:
                return f"Unsupported source type: {self.source_type}"

            # Apply column mapping after loading data
            self._apply_column_mapping()

            return self.data
        except Exception as e:
            return f"Error loading data: {str(e)}"

    def _load_from_db(self):
        """Load data from an SQLite database (can be extended for other DBs)."""
        conn = sqlite3.connect(self.source)  # Example for SQLite
        query = "SELECT * FROM kpi_data"  # Modify as needed
        return pd.read_sql(query, conn)

    def _load_from_api(self):
        """Fetch data from an API endpoint."""
        response = requests.get(self.source)
        if response.status_code == 200:
            return pd.DataFrame(response.json())  # Assuming API returns JSON
        return f"API request failed with status: {response.status_code}"

    def _load_from_json(self):
        """Load data from a JSON file."""
        with open(self.source, "r") as f:
            return pd.DataFrame(json.load(f))
    
    def _load_column_mapping(self):
        """Loads column mapping definitions from a JSON file."""
        try:
            with open(self.mapping_path, "r", encoding="utf-8") as file:
                return json.load(file)
        except Exception as e:
            print(f"Error loading column mapping: {e}")
            return {}

    def _find_best_match(self, detected_column):
            """Finds the best match for a detected column using RapidFuzz."""
            best_match = None
            highest_score = 0
            for standard_name, details in self.column_mappings.items():
                possible_matches = [standard_name] + details["alternative"]
                match, score,index = process.extractOne(detected_column, possible_matches, scorer=fuzz.ratio)

                if score > highest_score:
                    highest_score = score
                    best_match = standard_name
            # Return highest score
            if highest_score > 75:
                return best_match, highest_score
            else: 
                return None, None
        
    def _apply_column_mapping(self):
        """Maps detected columns to standarized names using RapidFuzz and AI fallback"""
        detected_columns = list(self.data.columns)
        mapped_columns = {}
        unmatched_columns = []
        
        # Step 1: Try RapidFuzz first
        for detected_col in detected_columns:
            best_match, highest_score  = self._find_best_match(detected_col)
            print(f"Input: {detected_col} -> Standard: {best_match} with a score of {highest_score}")

            if best_match:
                mapped_columns[detected_col] = best_match

            else:
                unmatched_columns.append(detected_col)

        # Step 2: Use ChatGPT for all unmatched columns in one request
        if unmatched_columns:
            ai_suggestions = ai_fallback_column_mapping(unmatched_columns, list(self.column_mappings.keys()))
            # ai_suggestions = [None for i in unmatched_columns] # Place holder while AI tool is addressed
            

            # Update mappings with AI suggestions
            for i, col in enumerate(unmatched_columns):
                mapped_columns[col] = ai_suggestions[i] if ai_suggestions[i] else col  # Keep original if AI fails

        # Step 3: Rename columns in the DataFrame
        self.data.rename(columns=mapped_columns, inplace=True)

        # Rename columns in the DataFrame
        self.data.rename(columns=mapped_columns, inplace=True)


After ChatGPT

In [55]:
mapping_path = "mappings/well_stimulation_mapping.json"
file_path = "Estimulaciones_edit.xlsx"
data_handler = DataHandler(source_type="excel", source=file_path, mapping_path=mapping_path)
df = data_handler.load_data()

Input: Región -> Standard: Region with a score of 100.0
Input: Activo  -> Standard: Lease with a score of 92.3076923076923
Input: Campo -> Standard: Field with a score of 100.0
Input: Pozo -> Standard: Well ID with a score of 100.0
Input: Latitud Conductor -> Standard: Latitude with a score of 100.0
Input: Longitud Conductor -> Standard: Longitude with a score of 100.0
Input: Pera -> Standard: Operational Permit with a score of 100.0
Input: Costo (USD) -> Standard: Cost (USD) with a score of 100.0
Input: Fecha -> Standard: Date with a score of 100.0
Input: Categoría del Pozo -> Standard: Well Category with a score of 100.0
Input: Intervención -> Standard: Intervention Type with a score of 100.0
Input: Cía -> Standard: Company with a score of 100.0
Input: Contrato -> Standard: Contract with a score of 100.0
Input: SOLV. [m3] -> Standard: Solvent Volume (m3) with a score of 100.0
Input: ACID. [m3] -> Standard: Acid Volume (m3) with a score of 100.0
Input: DIVER. [m3] -> Standard: Diverge

In [56]:
class FilterHandler:
    def __init__(self, df, mapping_path):
        """
        :param df: Pandas DataFrame containing standardized data.
        :param mapping_path: Path to JSON file containing column mappings & metadata.
        """
        self.df = df.dropna(axis=1, how="all")  # Drop empty columns
        self.mappings = self._load_mappings(mapping_path)
        self.date_columns = self._identify_columns_by_type("date")
        self._update_missing_metadata()

    def _load_mappings(self, mapping_path):
        """Loads column mappings including metadata."""
        try:
            with open(mapping_path, "r", encoding="utf-8") as f:
                return json.load(f)
        except FileNotFoundError:
            print("Warning: Mapping file not found. Using default categorization.")
            return {}

    def _identify_columns_by_type(self, column_type):
        """Identifies columns by type (e.g., 'date', 'numerical')."""
        return [col for col, meta in self.mappings.items() if meta.get("type") == column_type and col in self.df.columns]

    def _update_missing_metadata(self):
        """If columns are missing in the metadata file, attempt to categorize them dynamically."""
        for column in self.df.columns:
            if column not in self.mappings:
                sample_value = self.df[column].dropna().iloc[0] if not self.df[column].dropna().empty else "N/A"
                suggested_category = ai_suggest_column_category(column, sample_value)
                self.mappings[column] = suggested_category  # Store new mapping dynamically

                # Save updated mappings to file
                with open("column_mappings.json", "w", encoding="utf-8") as f:
                    json.dump(self.mappings, f, indent=4)

    def validate_kpi_columns(self, kpi_formula):
        """
        Validates that all columns in a KPI formula belong to the correct data category.
        :param kpi_formula: The formula generated by AI.
        :return: Boolean indicating validity and a warning message if needed.
        """
        invalid_columns = []
        for column in kpi_formula.get("variables", {}):
            category = self.mappings.get(column, {}).get("category", "Uncategorized")
            
            if "Pressure" in category and "Sum" in kpi_formula["formula"]:
                invalid_columns.append(column)
            if "Chemical Volume" in category and "Average" in kpi_formula["formula"]:
                invalid_columns.append(column)

        if invalid_columns:
            return False, f"Warning: The following columns may not be appropriate for the KPI calculation: {invalid_columns}"
        return True, "KPI is valid."

    def get_available_filters(self):
        """
        Detects possible filters, classifying numerical columns by category.
        :return: Dictionary of available filters (categorical, numerical by type, date, groups).
        """
        available_filters = {"categorical": [], "numerical": {}, "date": self.date_columns, "groups": {}}

        for column, meta in self.mappings.items():
            if column in self.df.columns:
                category = meta.get("category", "Uncategorized")
                group = meta.get("group", "None")
                
                if meta.get("type") == "numerical":
                    if category not in available_filters["numerical"]:
                        available_filters["numerical"][category] = []
                    available_filters["numerical"][category].append(column)
                elif meta.get("type") == "categorical":
                    available_filters["categorical"].append(column)
                
                if group != "None":
                    if group not in available_filters["groups"]:
                        available_filters["groups"][group] = []
                    available_filters["groups"][group].append(column)

        return available_filters
    
    def apply_filters(self, filters):
        """
        Apply user-defined filters dynamically.
        :param filters: Dictionary containing filter parameters.
        :return: Filtered DataFrame.
        """
        filtered_df = self.df.copy()

        # Apply categorical filters
        for column, value in filters.get("categorical", {}).items():
            if column in filtered_df.columns and value != "All":
                filtered_df = filtered_df[filtered_df[column] == value]

        # Apply numerical range filters
        for column, range_values in filters.get("numerical", {}).items():
            if column in filtered_df.columns and isinstance(range_values, list) and len(range_values) == 2:
                filtered_df = filtered_df[
                    (filtered_df[column] >= range_values[0]) & 
                    (filtered_df[column] <= range_values[1])
                ]
                
        # Apply date range filters
        for column, date_range in filters.get("date", {}).items():
            if column in filtered_df.columns:
                filtered_df = filtered_df[
                    (filtered_df[column] >= pd.to_datetime(date_range[0])) & 
                    (filtered_df[column] <= pd.to_datetime(date_range[1]))
                ]

        return filtered_df


In [57]:
filter_handler  = FilterHandler(df, mapping_path=mapping_path)
filter_handler

<__main__.FilterHandler at 0x17dfacb8050>

In [58]:
available_filter = filter_handler.get_available_filters()
for i in available_filter:
    print(i)
    print(available_filter[i])

categorical
['Region', 'Lease', 'Field', 'Well ID', 'Operational Permit', 'Well Category', 'Intervention Type', 'Company', 'Sensor / Nodal']
numerical
{'Financial': ['Cost (USD)', 'Revenue (USD)'], 'Chemical Volume': ['Solvent Volume (m3)', 'Acid Volume (m3)', 'Divergent Volume (m3)', 'Inhibitor Volume (m3)', 'Neutralizer Volume (m3)', 'Linear Gel Volume (m3)', 'Treated Water Volume (m3)', 'Brine Volume (m3)', 'Water Volume (m3)'], 'Operational': ['Liquid Displacement (m3)', 'Nitrogen Displacement (m3)', 'Total Fluid Volume (m3)'], 'Geological': ['Top (ft)', 'Base (ft)'], 'Well Performance': ['Qo Before (bpd)', 'Qo After (bpd)', 'Qo Difference (bpd)'], 'Pressure': ['Ptp Before (kg/cm²)', 'Ptp After (kg/cm²)', 'Ptp Difference (kg/cm²)', 'Pwf Before (kg/cm²)', 'Pwf After (kg/cm²)', 'Pwf Difference (kg/cm²)']}
date
['Date']
groups
{'Chemical Usage': ['Solvent Volume (m3)', 'Acid Volume (m3)', 'Divergent Volume (m3)', 'Inhibitor Volume (m3)', 'Neutralizer Volume (m3)', 'Linear Gel Volume (

In [63]:
user_filters = {
    # "categorical": {"Lease": "APSL"},
    # "date": {"Date": ["2020-09-07","2020-09-22"]},
    # "numerical": {"Cost (USD)": [0,35000]}
}

In [64]:
filtered_df = filter_handler.apply_filters(user_filters)

In [107]:
filtered_df

Unnamed: 0,Region,Lease,Field,Well ID,Operational Permit,Cost (USD),Date,Well Category,Intervention Type,Company,Solvent Volume (m3),Acid Volume (m3),Divergent Volume (m3),Inhibitor Volume (m3),Neutralizer Volume (m3),Linear Gel Volume (m3),Treated Water Volume (m3),Brine Volume (m3),Water Volume (m3),Liquid Displacement (m3),Nitrogen Displacement (m3),Total Fluid Volume (m3),Top (ft),Base (ft),Qo Before (bpd),Qo After (bpd),Qo Difference (bpd),Ptp Before (kg/cm²),Ptp After (kg/cm²),Ptp Difference (kg/cm²),Pwf Before (kg/cm²),Pwf After (kg/cm²),Sensor / Nodal,Pwf Difference (kg/cm²),Revenue (USD)
0,Sur,APSL,SAMARIA,SAMARIA 678,Pozo1,35920.8000,2020-09-07,DESARROLLO,LIMPIEZA CON TF,NS,10.0,,,,,,,,,58.0,,10.0,215.0,82.0,6497.0,8498.0,2001.0,283.0,426.0,143.0,,,Nodal,0.0,65
1,Sur,APSL,SAMARIA,SAMARIA 695,Pozo2,35920.8000,2020-09-07,DESARROLLO,LIMPIEZA CON TF,NS,10.0,,,,,,,,,58.0,,10.0,232.0,880.0,4600.0,6412.0,1812.0,430.0,456.0,26.0,600.0,756.0,Sensor,156.0,65
2,Sur,APSL,CUNDUACAN,CUNDUACAN 30A,Pozo3,32169.4269,2020-09-22,DESARROLLO,LIMPIEZA CON TF,NS,10.0,10.0,,5.0,5.0,,,,,80.0,,30.0,242.0,765.0,3000.0,5800.0,2800.0,300.0,470.0,170.0,520.0,710.0,Sensor,190.0,65
3,Sur,APSL,SAMARIA,SAMARIA 702,Pozo4,14831.8000,2020-10-30,DESARROLLO,LIMPIEZA CON TF,NS,10.0,,,,,,,,,74.0,,10.0,303.0,906.0,0.0,0.0,0.0,0.0,0.0,0.0,,,Nodal,0.0,65
4,Sur,APSL,SINI,SINI 2,Pozo5,34867.7396,2020-11-18,DESARROLLO,LIMPIEZA CON TF,NS,20.0,3.0,,1.5,1.5,,,,,68.0,,26.0,133.0,1005.0,2800.0,4600.0,1800.0,400.0,432.0,32.0,610.0,650.0,Sensor,40.0,65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,Sur,APBJ,CUPACHE,CUPACHE 1,,29663.6000,2022-03-25,DESARROLLO,LIMPIEZA CON TF,,20.0,,,,,,,,,,,20.0,,,,,,,,,,,,,65
194,Sur,APBJ,BELLOTA,BELLOTA 41,,19093.7800,2022-04-05,DESARROLLO,LIMPIEZA CIRCULADA,,10.0,3.0,,,1.5,,,,,,,14.5,,,,,,,,,,,,,65
195,Sur,APBJ,TUPILCO,TUPILCO 208T,,10128.7500,2022-04-05,DESARROLLO,LIMPIEZA CON TF,,,,,,,25.0,,,,,,25.0,,,,,,,,,,,,,65
196,Sur,APBJ,BELLOTA,BELLOTA 116A,,27867.8200,2022-04-22,DESARROLLO,LIMPIEZA CIRCULADA,,14.0,5.0,,,2.5,,,,,,,21.5,,,,,,,,,,,,,65


In [66]:
import json
import pandas as pd
import numpy as np
import importlib

class BaseKPI:
    def __init__(self, service_line,mapping_path, kpi_storage_file="custom_kpi.json"):
        """
        Generic KPI registry that loads predefined and AI-generated KPIs.
        """
        self.service_line = service_line
        self.mapping_path = mapping_path
        self.kpi_storage_file = kpi_storage_file
        self.kpis = {}

        # Load both predefined and AI-generated KPIs
        self.load_kpis()

    def load_kpis(self):
        """
        Dynamically loads predefined and AI-generated KPIs.
        """
        try:
            # Import the service-line module and get all functions
            module_name = f"{self.service_line.lower()}_kpi_definitions"
            kpi_module = importlib.import_module(module_name)

            # Get all function-based KPIs
            self.kpis.update(kpi_module.get_kpis(self.mapping_path))  # Loads functions instead of a dictionary
        except ModuleNotFoundError:
            print(f"Warning: No predefined KPIs found for {self.service_line}.")

        # Load AI-generated KPIs
        try:
            with open(self.kpi_storage_file, "r", encoding="utf-8") as f:
                ai_kpis = json.load(f)
                for kpi in ai_kpis:
                    if kpi["service_line"].lower() == self.service_line.lower():
                        self.kpis[kpi["name"]] = self.create_dynamic_kpi(kpi["formula"])
        except FileNotFoundError:
            print("No AI-generated KPIs found.")

    def create_dynamic_kpi(self, formula):
        """
        Safely converts an AI-generated formula into a callable function.
        Allows only mathematical operations on the DataFrame.
        """
        def safe_eval(df):
            safe_globals = {"pd": pd, "np": np}  # Allow only Pandas & NumPy
            safe_locals = {"df": df}  # Only allow the DataFrame inside eval()
            
            try:
                return eval(formula, safe_globals, safe_locals)  # Secure eval execution
            except Exception as e:
                print(f"Error evaluating AI-generated KPI: {e}")
                return None

        return safe_eval


In [67]:
class KPIEngine:
    def __init__(self, df, service_line,mapping_path):
        """
        :param df: Filtered DataFrame from FilterHandler.
        :param service_line: The service line (e.g., "Stimulation", "Artificial Lift").
        """
        self.df = df
        self.mapping_path = mapping_path
        self.service_line = service_line
        self.kpi_registry = BaseKPI(service_line=self.service_line,mapping_path=self.mapping_path)
        
    def calculate_kpi(self, kpi_name):
        """
        Processes and calculates a KPI.
        :param kpi_name: Name of the KPI to compute.
        :return: Computed KPI result.
        """
        if kpi_name not in self.kpi_registry.kpis:
            raise ValueError(f"KPI '{kpi_name}' is not registered for {self.kpi_registry.service_line}.")

        # Retrieve KPI function (fixing unpacking issue)
        kpi_function = self.kpi_registry.kpis[kpi_name]

        # Compute KPI on the pre-filtered dataset
        return kpi_function(self.df)

    def _extract_column_groups(self):
        """
        Extracts all column names and column groups dynamically from the provided mapping JSON file.
        Returns:
            - ALL_COLUMNS: A list of all available column names.
            - COLUMN_GROUPS: A dictionary of grouped column names.
        """
        try:
            with open(self.mapping_path, "r", encoding="utf-8") as f:
                COLUMN_MAPPING = json.load(f)

            # Extract all column names
            ALL_COLUMNS = list(COLUMN_MAPPING.keys())

            # Extract column groups dynamically
            COLUMN_GROUPS = {}
            for column_name, properties in COLUMN_MAPPING.items():
                group = properties.get("group")
                if group:
                    if group not in COLUMN_GROUPS:
                        COLUMN_GROUPS[group] = []
                    COLUMN_GROUPS[group].append(column_name)

            return ALL_COLUMNS, COLUMN_GROUPS  # Return both

        except (UnicodeDecodeError, FileNotFoundError, json.JSONDecodeError) as e:
            print(f"Error loading column mapping: {e}")
            return [], {}  # Return empty values to prevent crashes

    def create_dynamic_kpi(self, formula):
        """
        Converts an AI-generated formula into a callable function.
        Automatically replaces COLUMN_GROUPS references with actual column names.
        """
        # Load all column names and groups dynamically
        ALL_COLUMNS, COLUMN_GROUPS = self._extract_column_groups()

        # Replace COLUMN_GROUPS references with actual column names
        for group, columns in COLUMN_GROUPS.items():
            formula = formula.replace(f"COLUMN_GROUPS['{group}']", str(columns))

        def safe_exec(df):
            safe_globals = {"pd": pd}  # Allow only Pandas
            safe_locals = {"df": df}  # Allow access to DataFrame
            
            try:
                exec(formula, safe_globals, safe_locals)  # Executes multiple lines of Python code
                return safe_locals.get("result", None)  # Ensure "result" is returned
            except Exception as e:
                print(f"Error evaluating AI-generated KPI: {e}")
                return None

        return safe_exec  # Returns a callable function

In [12]:
mapping_path

'mappings/well_stimulation_mapping.json'

In [68]:
kpi_engine = KPIEngine(df=filtered_df,service_line='Stimulation',mapping_path=mapping_path)

No AI-generated KPIs found.


In [69]:
kpi_engine.kpi_registry.kpis

{'Total Chemical Percentage': <function stimulation_kpi_definitions.get_kpis.<locals>.total_chemical_percentage(df)>,
 'Total Chemical Usage per Lease': <function stimulation_kpi_definitions.get_kpis.<locals>.total_chemical_usage_per_lease(df)>,
 'Total Number of Activities': <function stimulation_kpi_definitions.get_kpis.<locals>.total_number_of_activities(df)>,
 'Total Revenue per Intervention': <function stimulation_kpi_definitions.get_kpis.<locals>.total_revenue_per_intervention(df)>}

In [70]:
kpi_engine.calculate_kpi("Total Chemical Usage per Lease")

{'Solvent Volume (m3)': {'APBJ': 1656.0,
  'APCP': 623.0,
  'APMM': 171.0,
  'APSL': 135.5,
  'APV': 18.0},
 'Acid Volume (m3)': {'APBJ': 200.0,
  'APCP': 33.5,
  'APMM': 403.0,
  'APSL': 103.5,
  'APV': 0.0},
 'Divergent Volume (m3)': {'APBJ': 7.0,
  'APCP': 0.0,
  'APMM': 12.0,
  'APSL': 4.0,
  'APV': 0.0},
 'Inhibitor Volume (m3)': {'APBJ': 41.5,
  'APCP': 4.0,
  'APMM': 17.0,
  'APSL': 10.0,
  'APV': 0.0},
 'Neutralizer Volume (m3)': {'APBJ': 84.0,
  'APCP': 4.0,
  'APMM': 32.0,
  'APSL': 29.0,
  'APV': 0.0},
 'Linear Gel Volume (m3)': {'APBJ': 220.0,
  'APCP': 180.0,
  'APMM': 13.0,
  'APSL': 45.0,
  'APV': 0.0},
 'Treated Water Volume (m3)': {'APBJ': 33.0,
  'APCP': 13.0,
  'APMM': 52.0,
  'APSL': 0.0,
  'APV': 0.0},
 'Brine Volume (m3)': {'APBJ': 30.0,
  'APCP': 0.0,
  'APMM': 0.0,
  'APSL': 18.0,
  'APV': 0.0},
 'Water Volume (m3)': {'APBJ': 50.0,
  'APCP': 0.0,
  'APMM': 0.0,
  'APSL': 42.0,
  'APV': 0.0}}

In [None]:
from ai_integration.chatgpt_kpi_assistant import generate_ai_kpi
user_prompt = ""
service_line = "Stimulation"
mapping_path = "mappings/well_stimulation_mapping.json"

In [102]:
ai_kpi = generate_ai_kpi(user_prompt, service_line, mapping_path)


name='Total Chemical Volume (Usage)' formula="result = (Chemical Usage['Solvent Volume (m3)'] + Chemical Usage['Acid Volume (m3)'] + Chemical Usage['Divergent Volume (m3)'] + Chemical Usage['Inhibitor Volume (m3)'] + Chemical Usage['Neutralizer Volume (m3)'] + Chemical Usage['Linear Gel Volume (m3)'] + Chemical Usage['Treated Water Volume (m3)'] + Chemical Usage['Brine Volume (m3)'] + Chemical Usage['Water Volume (m3)'])" description='This KPI calculates the total volume of chemicals used during stimulation activities.' service_line='Stimulation'


In [103]:
ai_kpi

{'name': 'Total Chemical Volume (Usage)',
 'formula': "result = (Chemical Usage['Solvent Volume (m3)'] + Chemical Usage['Acid Volume (m3)'] + Chemical Usage['Divergent Volume (m3)'] + Chemical Usage['Inhibitor Volume (m3)'] + Chemical Usage['Neutralizer Volume (m3)'] + Chemical Usage['Linear Gel Volume (m3)'] + Chemical Usage['Treated Water Volume (m3)'] + Chemical Usage['Brine Volume (m3)'] + Chemical Usage['Water Volume (m3)'])",
 'description': 'This KPI calculates the total volume of chemicals used during stimulation activities.',
 'service_line': 'Stimulation'}

In [104]:
ai_kpi['formula']

"result = (Chemical Usage['Solvent Volume (m3)'] + Chemical Usage['Acid Volume (m3)'] + Chemical Usage['Divergent Volume (m3)'] + Chemical Usage['Inhibitor Volume (m3)'] + Chemical Usage['Neutralizer Volume (m3)'] + Chemical Usage['Linear Gel Volume (m3)'] + Chemical Usage['Treated Water Volume (m3)'] + Chemical Usage['Brine Volume (m3)'] + Chemical Usage['Water Volume (m3)'])"

In [105]:
temp_kpi = kpi_engine.create_dynamic_kpi(ai_kpi['formula'])(filtered_df)

Error evaluating AI-generated KPI: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


In [106]:
temp_kpi

In [77]:
from ai_integration.chatgpt_kpi_assistant import generate_ai_kpi
user_prompt = "Calculate total USD cost used per lease"
service_line = "Stimulation"
mapping_path = "mappings/well_stimulation_mapping.json"

In [78]:
ai_kpi = generate_ai_kpi(user_prompt, service_line, mapping_path)


name='Total Cost per Lease' formula="result = df.groupby('Lease')['Cost (USD)'].sum()" description='Calculates the total cost in USD for each lease by summing the individual costs.' service_line='Stimulation'


In [79]:
temp_kpi = kpi_engine.create_dynamic_kpi(ai_kpi['formula'])(filtered_df)
print(temp_kpi)

Lease
APBJ    1.753586e+07
APCP    5.114384e+07
APMM    1.977384e+05
APSL    9.898196e+06
APV     0.000000e+00
Name: Cost (USD), dtype: float64
