In [14]:
import json
import ollama
import re

def parse_txt_file(file_path):
    """
    从txt文件中解析调度问题数据
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    print("原始文件内容:")
    print(content[:500])  # 打印前500个字符用于调试
    
    # 初始化结果字典
    result = {}
    
    # 解析问题描述
    desc_match = re.search(r'problem description:\s*"([^"]*)"', content)
    if desc_match:
        result['problem_description'] = desc_match.group(1)
        print(f"解析到问题描述: {result['problem_description'][:100]}...")
    
    # 解析参数
    params_match = re.search(r'parameters:\s*(\[.*?\])', content, re.DOTALL)
    if params_match:
        try:
            params_str = params_match.group(1)
            print(f"参数字符串: {params_str[:200]}...")
            result['parameters'] = json.loads(params_str)
            print(f"成功解析 {len(result['parameters'])} 个参数")
        except json.JSONDecodeError as e:
            print(f"参数解析错误: {e}")
            result['parameters'] = []
    
    # 解析决策变量
    vars_match = re.search(r'decision variables:\s*(\[.*?\])', content, re.DOTALL)
    if vars_match:
        try:
            vars_str = vars_match.group(1)
            print(f"决策变量字符串: {vars_str[:200]}...")
            result['decision_variables'] = json.loads(vars_str)
            print(f"成功解析 {len(result['decision_variables'])} 个决策变量")
        except json.JSONDecodeError as e:
            print(f"决策变量解析错误: {e}")
            result['decision_variables'] = []
    
    # 解析目标函数 - 重点修复这里
    # 使用更精确的正则表达式匹配目标函数
    obj_match = re.search(r'objective function:\s*(\{.*?\})(?=\s*;|\s*$)', content, re.DOTALL)
    if obj_match:
        obj_str = obj_match.group(1)
        print(f"目标函数字符串: {obj_str}")
        try:
            result['objective_function'] = json.loads(obj_str)
            print(f"成功解析目标函数: {result['objective_function']}")
        except json.JSONDecodeError as e:
            print(f"目标函数解析错误: {e}")
            # 尝试手动解析
            result['objective_function'] = {}
            func_match = re.search(r'"function":\s*"([^"]*)"', obj_str)
            desc_match = re.search(r'"description":\s*"([^"]*)"', obj_str)
            if func_match and desc_match:
                result['objective_function'] = {
                    "function": func_match.group(1),
                    "description": desc_match.group(1)
                }
                print(f"手动解析目标函数成功: {result['objective_function']}")
    else:
        print("未找到目标函数匹配")
        # 尝试更宽松的匹配
        obj_match_loose = re.search(r'objective function:\s*(\{.*\})', content, re.DOTALL)
        if obj_match_loose:
            obj_str = obj_match_loose.group(1)
            print(f"宽松匹配到目标函数: {obj_str}")
            try:
                result['objective_function'] = json.loads(obj_str)
                print(f"成功解析目标函数: {result['objective_function']}")
            except:
                result['objective_function'] = {}
        else:
            result['objective_function'] = {}
    
    # 解析约束条件
    constraints_match = re.search(r'constraints:\s*(\[.*?\])', content, re.DOTALL)
    if constraints_match:
        try:
            constraints_str = constraints_match.group(1)
            result['constraints'] = json.loads(constraints_str)
            print(f"成功解析 {len(result['constraints'])} 个约束条件")
        except json.JSONDecodeError as e:
            print(f"约束条件解析错误: {e}")
            result['constraints'] = []
    
    return result

def extract_related_variables_with_ollama(constraint_function, constraint_description, decision_variables):
    """
    使用Ollama提取约束条件中相关的决策变量
    """
    # 构建决策变量符号列表
    var_symbols = [var['symbol'] for var in decision_variables]
    
    prompt = f"""
    给定以下约束条件：
    数学表达式: {constraint_function}
    描述: {constraint_description}
    
    以及可用的决策变量符号列表: {var_symbols}
    
    请分析这个约束条件中使用了哪些决策变量符号。只返回在约束条件中实际出现的决策变量符号列表，格式为JSON数组。
    
    例如，如果约束中使用了 C_{{ijq}} 和 u_{{ijq}}，则返回: ["C_{{ijq}}", "u_{{ijq}}"]
    
    只返回JSON数组，不要有其他解释。
    """
    
    try:
        response = ollama.chat(
            model="deepseek-r1:8b", 
            messages=[{"role": "user", "content": prompt}]
        )
        result_text = response['message']['content'].strip()
        
        # 尝试解析JSON响应
        try:
            related_vars = json.loads(result_text)
            if isinstance(related_vars, list):
                # 验证返回的符号是否在决策变量列表中
                valid_vars = [var for var in related_vars if var in var_symbols]
                return valid_vars
        except json.JSONDecodeError:
            # 如果JSON解析失败，尝试从文本中提取符号
            extracted_vars = []
            for var in var_symbols:
                if var in result_text:
                    extracted_vars.append(var)
            return extracted_vars
        
        return []
    except Exception as e:
        print(f"Ollama提取相关变量失败: {e}")
        return []

def convert_to_standard_json_format(problem_description, parameters, decision_variables, objective_function, constraints):
    """
    Convert the input scheduling problem data to the exact JSON format as shown in the example
    """
    
    print("Starting conversion to standard JSON format...")
    
    # Test Ollama connection
    try:
        response = ollama.chat(
            model="deepseek-r1:8b",
            messages=[{"role": "user", "content": "Test connection"}],
            options={'num_predict': 5}
        )
        print("Ollama connection successful")
    except Exception as e:
        print(f"Ollama connection failed: {e}")
        return None
    
    # Generate title and type using LLM
    title_prompt = f"Based on this problem description: '{problem_description[:300]}...' Generate a concise title and problem type in English. Output format: Title|Type"
    
    try:
        response = ollama.chat(
            model="deepseek-r1:8b", 
            messages=[{"role": "user", "content": title_prompt}]
        )
        title_type = response['message']['content'].strip().split('|')
        title = title_type[0].strip() if len(title_type) > 0 else "Flexible Job Shop Scheduling Problem"
        problem_type = title_type[1].strip() if len(title_type) > 1 else "Flexible Job Shop"
        print(f"Generated title: {title}")
        print(f"Generated type: {problem_type}")
    except Exception as e:
        print(f"Title/type generation failed: {e}")
        title = "Flexible Job Shop Scheduling Problem"
        problem_type = "Flexible Job Shop"
    
    # Build the complete JSON structure matching the example format
    result = {
        "title": title,
        "type": problem_type,
        "description": problem_description,
        "Nomenclature": {
            "Parameters": [],
            "Decision Variables": [],
            "Domain terms": {
                "Makespan": "In production scheduling problems, the makespan is the maximum completion time of jobs"
            }
        },
        "Formulation": {
            "Objective Function": {
                "function": "",
                "gurobi_code": "",
                "description": ""
            },
            "Constraints": []
        }
    }
    
    # Process parameters
    for param in parameters:
        symbol = param.get('symbol', '')
        definition = param.get('definition', '')
        
        param_entry = {
            "symbol": symbol,
            "definition": definition
        }
        result["Nomenclature"]["Parameters"].append(param_entry)
    
    # Process decision variables
    for var in decision_variables:
        symbol = var.get('symbol', '')
        definition = var.get('definition', '')
        
        # Determine variable type based on definition
        if 'binary' in definition.lower() or 'equals 1' in definition.lower() or '0 otherwise' in definition.lower() or '1 if' in definition.lower():
            var_type = "Binary"
        else:
            var_type = "Continuous"
        
        var_entry = {
            "symbol": symbol,
            "definition": definition,
            "type": var_type
        }
        result["Nomenclature"]["Decision Variables"].append(var_entry)
    
    # Process objective function
    print(f"处理目标函数: {objective_function}")
    if objective_function and isinstance(objective_function, dict):
        obj_function = objective_function.get('function', '')
        obj_description = objective_function.get('description', '')
        
        print(f"目标函数内容: function='{obj_function}', description='{obj_description}'")
        
        if obj_function or obj_description:
            result["Formulation"]["Objective Function"] = {
                "function": obj_function,
                "description": obj_description
            }
            print("目标函数已设置")
        else:
            print("目标函数内容为空")
            # 如果目标函数为空，使用默认值
            result["Formulation"]["Objective Function"] = {
                "function": "minimize C_max and sum of T_i",
                "description": "Minimize makespan and total tardiness simultaneously"
            }
    else:
        print("目标函数数据无效或为空")
        # 使用默认目标函数
        result["Formulation"]["Objective Function"] = {
            "function": "minimize C_max and sum of T_i",
            "description": "Minimize makespan and total tardiness simultaneously"
        }
    
    # Process constraints - 使用Ollama提取相关决策变量
    for i, constr in enumerate(constraints):
        function = constr.get('function', '')
        description = constr.get('description', '')
        
        # Ensure function is a list
        if not isinstance(function, list):
            function = [function]
        
        # Extract related parameters from the function
        related_params = []
        constraint_text = ' '.join(function)
        
        # 匹配参数
        for param in result["Nomenclature"]["Parameters"]:
            symbol = param["symbol"]
            # 创建简化的符号用于匹配（去掉特殊字符）
            simple_symbol = re.sub(r'[{}_]', '', symbol)
            simple_constraint = re.sub(r'[{}_]', '', constraint_text)
            
            if symbol in constraint_text or simple_symbol in simple_constraint:
                if symbol not in related_params:
                    related_params.append(symbol)
        
        # 使用Ollama提取相关决策变量
        print(f"使用Ollama提取约束 {i+1} 的相关决策变量...")
        related_vars = extract_related_variables_with_ollama(
            constraint_text, 
            description, 
            result["Nomenclature"]["Decision Variables"]
        )
        
        print(f"约束 {i+1} 的相关决策变量: {related_vars}")
        
        constraint_entry = {
            "function": function,
            "description": description,
            "related Parameters": related_params,
            "related Decision Variables": related_vars,
        }
        result["Formulation"]["Constraints"].append(constraint_entry)
    
    print("Conversion completed successfully")
    return result

def run_complete_conversion_from_file(file_path):
    """从文件中运行完整转换"""
    
    # 从文件解析数据
    print(f"从文件 {file_path} 解析数据...")
    parsed_data = parse_txt_file(file_path)
    
    if not parsed_data:
        print("文件解析失败")
        return None
    
    # 提取解析出的数据
    problem_description = parsed_data.get('problem_description', '')
    parameters = parsed_data.get('parameters', [])
    decision_variables = parsed_data.get('decision_variables', [])
    objective_function = parsed_data.get('objective_function', {})
    constraints = parsed_data.get('constraints', [])
    
    # 显示解析结果
    print(f"解析到的问题描述: {problem_description[:100]}...")
    print(f"解析到的参数数量: {len(parameters)}")
    print(f"解析到的决策变量数量: {len(decision_variables)}")
    print(f"解析到的目标函数: {objective_function}")
    print(f"解析到的约束条件数量: {len(constraints)}")
    
    # 转换为标准JSON格式
    result = convert_to_standard_json_format(
        problem_description, 
        parameters, 
        decision_variables, 
        objective_function,
        constraints
    )
    
    if result:
        output_file = 'complete_scheduling_problem_from_file_fixed.json'
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(result, f, indent=2, ensure_ascii=False)
        print(f"结果保存到 {output_file}")
        
        # 打印目标函数部分用于验证
        print("最终JSON中的目标函数:")
        print(json.dumps(result["Formulation"]["Objective Function"], indent=2))
        
        return result
    else:
        print("转换失败")
        return None

# 执行从文件的转换
result = run_complete_conversion_from_file('1.txt')

从文件 1.txt 解析数据...
原始文件内容:
1. problem description: "A flexible job shop cell scheduling problem in cellular manufacturing with sequence-dependent family setup times and intercellular transfer times. The environment consists of multiple cells, part families, jobs, and machines. Each job includes multiple operations with specific routes through the shop. Operations can be processed on identical parallel machines. Jobs may visit machines or work centers multiple times (reentrant parts). Processing times include intracellular
解析到问题描述: A flexible job shop cell scheduling problem in cellular manufacturing with sequence-dependent family...
参数字符串: [{"symbol": "m", "definition": "The total number of machines"}, {"symbol": "n", "definition": "The total number of jobs"}, {"symbol": "C", "definition": "The total number of cells"}, {"symbol": "L", "...
成功解析 14 个参数
决策变量字符串: [{"symbol": "C_{ijq}", "definition": "The completion time of j-th operation of job i on machine q"}, {"symbol": "u_{ijq}", "defi