In [None]:
import csv
import re
import json

# Function to extract numbers from a string (e.g., a column entry)
def extract_numbers_from_string(string):
    """
    Extracts all numbers (integers and floats) from a given string.

    Args:
        string (str): The input string containing numbers.

    Returns:
        list: A list of numbers extracted from the string.
    """
    # Use regular expression to find all numbers (both integers and floats)
    numbers = re.findall(r'[-+]?\d*\.\d+|\d+', string)
    return [float(num) if '.' in num else int(num) for num in numbers]

# Function to process CSV and extract numbers from three specific columns
def process_csv_file(file_path):
    """
    Processes the CSV file to extract numerical data from 'blends', 'cokeParameters',
    and 'processParameters' columns.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        tuple: Three lists containing blends_matrix, cokeParameters_matrix, and processParameters_matrix.
    """
    blends_matrix = []
    cokeParameters_matrix = []
    processParameters_matrix = []

    with open(file_path, mode='r', newline='', encoding='utf-8') as file:
        csv_reader = csv.DictReader(file)

        for row in csv_reader:
            blends_numbers = extract_numbers_from_string(row['blends'])
            cokeParameters_numbers = extract_numbers_from_string(row['cokeParameters'])
            processParameters_numbers = extract_numbers_from_string(row['processParameters'])

            blends_matrix.append(blends_numbers)
            cokeParameters_matrix.append(cokeParameters_numbers)
            processParameters_matrix.append(processParameters_numbers)

    return blends_matrix, cokeParameters_matrix, processParameters_matrix

# Function to process blends data and analyze source presence and percentages
def analyze_blends(file_path, all_sources):
    """
    Analyzes the blends data to determine the presence of each source and their
    minimum and maximum percentage ranges.

    Args:
        file_path (str): The path to the CSV file.
        all_sources (list): A list of all possible coal sources.

    Returns:
        list: A list of dictionaries containing presence, min_percentage, and max_percentage for each row.
    """
    results = []
    with open(file_path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        headers = next(reader)  # Skip header

        # Assuming 'blends' is the first column
        blends_index = headers.index('blends') if 'blends' in headers else 0

        for row in reader:
            if row[blends_index]:
                blends_data = json.loads(row[blends_index])
                source_data = {
                    entry['country']: {
                        'minPercentage': entry['minPercentage'],
                        'maxPercentage': entry['maxPercentage']
                    }
                    for entry in blends_data
                }
                presence = []
                min_percentage = []
                max_percentage = []
                for source in all_sources:
                    if source in source_data:
                        presence.append(True)
                        min_percentage.append(source_data[source]['minPercentage'])
                        max_percentage.append(source_data[source]['maxPercentage'])
                    else:
                        presence.append(False)
                        min_percentage.append(0)
                        max_percentage.append(0)
                results.append({
                    'presence': presence,
                    'min_percentage': min_percentage,
                    'max_percentage': max_percentage
                })
    return results

def main():
    # Path to the CSV file
    file_path = 'coal_blend_data.csv'  # Update the path as needed

    # Process the CSV to extract matrices
    blends_matrix, cokeParameters_matrix, processParameters_matrix = process_csv_file(file_path)

    # Print the resulting matrices
    print("\nCoke Parameters Matrix:")
    for row in cokeParameters_matrix:
        print(row)

    print("\nProcess Parameters Matrix:")
    for row in processParameters_matrix:
        print(row)

    # Define all possible coal sources
    all_sources = [
        "West Bokaro", "Sonoma", "Bhelatand", "Emereld", "Poetrel", "PCI", "Illawarra",
        "Bedford", "Curragh SS", "Moranbah N", "Goonyella", "Kestrel", "Tech Premium"
    ]

    # Analyze blends data
    results = analyze_blends(file_path, all_sources)

    # Display the results for all rows
    for i, result in enumerate(results):
        print(f"\nRow {i+1} - Presence: {result['presence']}")
        print(f"Row {i+1} - Min Percentage: {result['min_percentage']}")
        print(f"Row {i+1} - Max Percentage: {result['max_percentage']}")

if __name__ == "__main__":
    main()
