In [1]:
import pandas as pd

def is_within_bounds(row, bounds):
    # Check each value against the corresponding bound
    for val, (low, high) in zip(row, bounds):
        if not (low <= val <= high):
            return False
    return True

def save_top_inputs(inputs_path, outputs_path, column_name='3', top_n=10, bounds=[]):
    # Load the data from Excel files
    inputs_df = pd.read_csv(inputs_path)
    outputs_df = pd.read_csv(outputs_path)

    # Ensure the column_name exists in the outputs
    if column_name not in outputs_df.columns:
        raise ValueError(f"The column {column_name} does not exist in the outputs file.")

    # Apply bounds to inputs and filter data
    valid_inputs_df = inputs_df[inputs_df.apply(lambda row: is_within_bounds(row, bounds), axis=1)]

    # Check if there are enough valid entries
    if len(valid_inputs_df) < top_n:
        print(f"Not enough valid entries found. Only {len(valid_inputs_df)} valid entries available.")
        return "Not enough valid data."

    # Find indices of valid inputs
    valid_indices = valid_inputs_df.index

    # Select corresponding outputs using valid indices
    valid_outputs_df = outputs_df.loc[valid_indices]

    # Find the top N rows with the smallest values in the specified CAPEX column from the valid outputs
    top_outputs = valid_outputs_df.nsmallest(top_n, column_name)

    # Find associated inputs using the index from top outputs
    associated_inputs = inputs_df.loc[top_outputs.index]

    # Save the data to CSV files
    associated_inputs.to_csv('top_inputs.csv', index=False)
    top_outputs[[column_name]].to_csv('top_outputs.csv', index=False)

    return "Files saved successfully, found {} valid outputs.".format(len(top_outputs))






In [2]:
## BOUNDS

bounds = [[-float('inf'), float('inf')],        # 1. flask fermentation time
        [43200, 108000],                        # 2. seed fermentation time  !!!
        [-float('inf'), float('inf')],          # 3. fed seed volume
        [0.010225325, 0.030675975],             # 4. working seed volume   !!!
        [-float('inf'), float('inf')],           # 5. conversion seed
        [64800, 144000],                        # 6. main fermentation time  !!!
        [0.0190875, 0.0572625],                 # 7. fed main volume    !!!
        [0.102253255, 0.306759765],             # 8. working main       !!!
        [0.9, 0.98],                            # 9. conversion main    !!!
        [-float('inf'), float('inf')],          # 10. concentration solids
        [0.28903585,  0.86710755],              # 11. resuspension/lysis/neutralization volume  !!!
        [-float('inf'), float('inf')],          # 12. UF/DF 1 equilibrium 
        [10, 50],                               # 13. UF/DF 1 volumes  !!!
        [-float('inf'), float('inf')],           # 14. UF/DF 1 flush
        [-float('inf'), float('inf')],           # 15. UF/DF 2 equilibrium 
        [10, 50],                               # 16. UF/DF 2 volumes   !!!
        [0.00067,  0.002],                      # 17. UF/DF 2 flush   !!!
        [-float('inf'), float('inf')]           # 18. failure rate
        ]




In [3]:
# Paths to your files (update these paths accordingly)
inputs_file_path = 'inputs.csv'
outputs_file_path = 'outputs.csv'

# Usage of the function
result = save_top_inputs(inputs_file_path, outputs_file_path, bounds=bounds)
print(result)


Files saved successfully, found 10 valid outputs.
