# OptCache Report Builder

Этот скрипт загружает данные оптимизации из CSV файлов и выводит топ 5 результатов по убыванию custom_fitness и profit.

In [36]:

filter_rule = {
    'fields': {
        'profit': {
            'enabled': True,
            'range': (0.01, float('inf')),
            'color_ranges': []
        },
        'trades_per_30_days': {
            'enabled': True,
            'range': (0.5, float('inf')),
            'color_ranges': []
        },
        'profit_per_30_days_percent': {
            'enabled': False,
            'range': (0.005, float('inf')),
            'color_ranges': [
                {'range': (-float('inf'), 0.010), 'color': 'pink'},
                {'range': (0.010, 0.020), 'color': 'moccasin'},
                {'range': (0.020, float('inf')), 'color': 'lightgreen'}
            ]
        },
        'profit_per_365_days_percent': {
            'enabled': True,
            'range': (0.05, float('inf')),
            'color_ranges': [
                {'range': (-float('inf'), 0.050), 'color': 'pink'},
                {'range': (0.050, 0.070), 'color': 'moccasin'},
                {'range': (0.070, 0.100), 'color': '#C5E8B7'},
                {'range': (0.100, 0.150), 'color': '#ABE098'},
                {'range': (0.150, 0.200), 'color': '#83D475'},
                {'range': (0.200, 0.250), 'color': '#57C84D'},
                {'range': (0.250, float('inf')), 'color': '#2EB62C'}
            ]
        },
        'calmar': {
            'enabled': True,
            'range': (1.5, float('inf')),
            'color_ranges': [
                {'range': (1.0, 1.5), 'color': 'moccasin'},
                {'range': (1.5, float('inf')), 'color': 'lightgreen'}
            ]
        },
        'win_rate': {
            'enabled': False,
            'range': (0.0, float('inf')),
            'color_ranges': [
                {'range': (50.0, float('inf')), 'color': 'lightgreen'}
            ]
        },
        'custom_fitness': {
            'enabled': True,
            'range': (0.75, float('inf')),
            'color_ranges': [
                {'range': (0.00, 0.75), 'color': 'pink'},
                {'range': (0.76, 0.90), 'color': 'moccasin'},
                {'range': (0.90, float('inf')), 'color': 'lightgreen'}
            ]
        },
        'reldrawdownpercnt_e': {
            'enabled': True,
            'range': (0, 15),
            'color_ranges': [
                {'range': (0.00, 5.0),  'color': 'lightgreen'},
                {'range': (5.00, 15.0), 'color': 'moccasin'},
                {'range': (15.0, float('inf')), 'color': 'pink'}
            ]
        },
    },
    'max_runs_count': 100,
    'sort_by': ['symbol', 'rank_score'],
    'sort_dir': [True, False]
}

# Scoring configuration for rank_score calculation
# Formula: rank_score = R² × norm(Calmar) × WeightedSum × 100
# Where WeightedSum = Σ(w_i × norm(m_i)) / Σw_i
scoring_config = {
    # Weights for weighted sum (any numbers, normalized automatically)
    'weights': {
        'sharpe_ratio': 3,
        'recovery_factor': 2,
        'win_rate': 2,
        'profit_factor': 1,
    }
}

In [37]:
import pandas as pd
import numpy as np
from pathlib import Path
import re
import plotly.graph_objects as go

In [38]:
def LoadHeaderFromFileToDF(optcache_filename: str) -> pd.DataFrame:
    header_file = Path(f'{optcache_filename}.Header.csv')

    if not header_file.exists():
        print(f"File {header_file} do not exist")
        return pd.DataFrame()

    header_df = pd.read_csv(header_file, sep=';', encoding='utf-16')
    header_df['days'] = (pd.to_datetime(header_df['date_to']) - pd.to_datetime(header_df['date_from'])).dt.days
    header_df['filename'] = optcache_filename

    return header_df

def ClassifySymbol(symbol: str) -> str:
    """Classify financial symbol into category using local dictionaries and patterns"""
    
    symbol_upper = symbol.upper()
    
    # Known commodities
    commodities = {
        'GOLD', 'SILVER', 'COPPER', 'ALUMINIUM', 'ALUMINUM', 'PALLADIUM', 'PLATINUM',
        'BRENT', 'WTI', 'CRUDE', 'NGAS', 'NATGAS',
        'WHEAT', 'CORN', 'SOYBEAN', 'SUGAR', 'COFFEE', 'COCOA', 'COTTON',
        'XAUUSD', 'XAGUSD', 'XTIUSD'
    }
    
    # Known crypto prefixes
    crypto_prefixes = {'BTC', 'ETH', 'LTC', 'XRP', 'BCH', 'ADA', 'DOT', 'LINK', 'XLM', 'DOGE', 'MATIC', 'SOL', 'AVAX'}
    
    # Check exact match for commodities
    if symbol_upper in commodities:
        return 'Commodities'
    
    # Check crypto (BTCUSD, ETHUSD, etc)
    for prefix in crypto_prefixes:
        if symbol_upper.startswith(prefix):
            return 'Crypto'
    
    # Check indices (contain numbers like 50, 100, 500, 2000, etc)
    indices_keywords = ['SP', 'NAS', 'DOW', 'DAX', 'FTSE', 'NIKKEI', 'CHINA50', 'RUSSELL', 
                        'STOXX50', 'DXY', 'HK50', 'UK100', 'US500', 'USTEC']
    if symbol_upper in indices_keywords:
        return 'Indices'
    
    # Check Forex (6 chars, 3 currency codes)
    if len(symbol_upper) == 6 and symbol_upper.isalpha():
        currency_codes = {'USD', 'EUR', 'GBP', 'JPY', 'AUD', 'NZD', 'CAD', 'CHF', 'CNY', 'HKD', 'SGD'}
        first_part = symbol_upper[:3]
        second_part = symbol_upper[3:]
        if first_part in currency_codes and second_part in currency_codes:
            return 'Forex'
    
    # If nothing matched, return Other
    return 'Other'

def GroupSymbolsByCategory(symbols: list) -> dict:
    """Group symbols by their categories"""
    
    categories = {
        'Commodities': [],
        'Indices': [],
        'Crypto': [],
        'Stocks': [],
        'Forex': [],
        'Other': []
    }
    
    for symbol in symbols:
        category = ClassifySymbol(symbol)
        categories[category].append(symbol)
    
    # Remove empty categories and sort symbols within each category
    return {cat: sorted(syms) for cat, syms in categories.items() if syms}

def BuildHeaderReport(header_df: pd.DataFrame, data_df: pd.DataFrame) -> list[str]:
    if header_df.empty:
        return []

    row = header_df.iloc[0]

    # Format dates as YYYY-MM-DD
    date_from = pd.to_datetime(row["date_from"]).strftime("%Y-%m-%d")
    date_to = pd.to_datetime(row["date_to"]).strftime("%Y-%m-%d")

    symbols = list(map(str, pd.unique(data_df['symbol'])))
    grouped_symbols = GroupSymbolsByCategory(symbols)
    
    report = [
        f'## Backtest Optimization for {header_df["expert_name"].iloc[0]} / {len(symbols)} sym',
        f'<table>',
        f'    <tr>',
        f'        <td style="vertical-align: top"> ',
        f'            <b>Bot:</b> {row["expert_name"]} <br>',
        f'            <b>Interval:</b> [{date_from}; {date_to})<br>',
        f'            <b>Duration:</b> {row["days"]} days',
        f'        </td>',
        f'        <td style="vertical-align: top"> ',
        f'            <b>Server:</b> {row["server"]}<br>',
        f'            <b>Deposit:</b> {row["trade_deposit"]}<br>',
        f'            <b>Leverage:</b> 100',
        f'        </td>',
        f'    </tr>',
        f'    <tr>',
        f'        <td colspan=2>',
    ]

    # Add symbols grouped by category
    for category, syms in grouped_symbols.items():
        report.append(f'<b>{category}:</b> {", ".join(syms)}')
    
    report.append(f'        </td>')
    report.append(f'    </tr>')
    report.append(f'</table>')

    return report    

def CalculateRankScore(df: pd.DataFrame, config: dict) -> pd.Series:
    """
    Calculate rank_score for each row in DataFrame.
    
    Formula: rank_score = R² × norm(Calmar) × WeightedSum × 100
    
    Where:
    - R² = custom_fitness (already in [0, 1])
    - norm(Calmar) = Min-Max normalized calmar ratio
    - WeightedSum = Σ(w_i × norm(m_i)) / Σw_i
    
    Args:
        df: DataFrame with optimization results
        config: Scoring configuration with weights
    
    Returns:
        Series with rank_score values
    """
    if df.empty:
        return pd.Series(dtype=float)
    
    # Min-Max normalization helper
    def min_max_norm(series):
        min_val = series.min()
        max_val = series.max()
        if max_val == min_val:
            return pd.Series([1.0] * len(series), index=series.index)
        return (series - min_val) / (max_val - min_val)
    
    # Get R² (custom_fitness) - already in [0, 1]
    r2 = df['custom_fitness'].clip(0, 1)
    
    # Normalize Calmar
    calmar_norm = min_max_norm(df['calmar'])
    
    # Calculate weighted sum of other metrics
    weights = config.get('weights', {})
    total_weight = sum(weights.values())
    
    if total_weight == 0:
        weighted_sum = pd.Series([1.0] * len(df), index=df.index)
    else:
        weighted_sum = pd.Series([0.0] * len(df), index=df.index)
        for metric, weight in weights.items():
            if metric in df.columns:
                norm_metric = min_max_norm(df[metric])
                weighted_sum += weight * norm_metric
        weighted_sum = weighted_sum / total_weight
    
    # Calculate final rank_score
    return (r2 * calmar_norm * weighted_sum * 100).round(2)

def BuildReportDF(optcache_filename: str, filter: dict) -> pd.DataFrame: 

    header_df = LoadHeaderFromFileToDF(optcache_filename)
    if header_df.empty:
        return pd.DataFrame()

    data_file = Path(f'{optcache_filename}.Data.csv')
    if not data_file.exists():
        print(f"File {data_file} do not exist")
        return pd.DataFrame()

    # add fields to data
    data_df = pd.read_csv(data_file, sep=';', encoding='utf-16')
    data_df['win_rate'] = data_df['profittrades'] / data_df['trades']
    data_df['profit_per_30_days'] = data_df['profit'] / header_df['days'].iloc[0] * 30
    data_df['profit_per_365_days'] = data_df['profit'] / header_df['days'].iloc[0] * 365
    data_df['profit_per_30_days_percent'] = data_df['profit'] / header_df['days'].iloc[0] * 30 / data_df['initial_deposit']
    data_df['profit_per_365_days_percent'] = data_df['profit'] / header_df['days'].iloc[0] * 365 / data_df['initial_deposit']
    data_df['trades_per_30_days'] = data_df['trades'] / header_df['days'].iloc[0] * 30
    
    # Calmar Ratio = Annual Return % / Max Drawdown %
    # Handle division by zero: if DD is 0, set calmar to a large value
    data_df['calmar'] = np.where(
        data_df['reldrawdownpercnt_e'] > 0,
        data_df['profit_per_365_days_percent'] * 100 / data_df['reldrawdownpercnt_e'],
        0  # Will be set to max after filtering
    )
   
    data_df['filename'] = optcache_filename
    data_df['symbol'] = header_df['symbol'].iloc[0]
    data_df['expert_name'] = header_df['expert_name'].iloc[0]
    data_df['date_from'] = header_df['date_from'].iloc[0]
    data_df['date_to'] = header_df['date_to'].iloc[0]
    data_df['days'] = (pd.to_datetime(header_df['date_to'].iloc[0]) - pd.to_datetime(header_df['date_from'].iloc[0])).days
    data_df['months'] = (pd.to_datetime(header_df['date_to'].iloc[0]) - pd.to_datetime(header_df['date_from'].iloc[0])).days // 30
    data_df['period'] = header_df['period'].iloc[0]
    data_df['trade_deposit'] = header_df['trade_deposit'].iloc[0]
    data_df['trade_currency'] = header_df['trade_currency'].iloc[0]
    data_df['trade_leverage'] = header_df['trade_leverage'].iloc[0]
    data_df['server'] = header_df['server'].iloc[0]
    data_df['ticks_mode'] = header_df['ticks_mode'].iloc[0]
    data_df['rank_score'] = CalculateRankScore(data_df, scoring_config)

    data_df.fillna(0, inplace=True)

    # filter data
    report_data = data_df.copy()
    for column, rule in filter.get('fields', {}).items():
        if not rule.get('enabled', True):
            continue
        if not rule.get('range'):
            continue
        min_val, max_val = rule['range']
        report_data = report_data[(report_data[column] >= min_val) & (report_data[column] <= max_val)]

    report_data = report_data.sort_values(filter_rule.get('sort_by', []), ascending=filter_rule.get("sort_dir", []))
    if 'max_runs_count' in filter:
        report_data = report_data.head(filter.get('max_runs_count'))

    return report_data

def CreateSetFile(optcache_filename: str, pass_number: int, output_dir: str = None, record_num: int = 1, total_records: int = 1) -> str:
    """
    Create a .set file from optimization data for a specific pass.
    
    Args:
        optcache_filename: Path to the .opt file
        pass_number: Pass number to extract parameters from
        output_dir: Output directory for .set file (default: same as opt file)
        record_num: Record number in the report (for filename prefix)
        total_records: Total number of records (to determine prefix width)
    
    Returns:
        Path to created .set file or empty string if failed
    """
    
    # Load header to get expert name and symbol
    header_df = LoadHeaderFromFileToDF(optcache_filename)
    if header_df.empty:
        print(f"Cannot load header from {optcache_filename}")
        return ""
    
    expert_name = header_df['expert_name'].iloc[0]
    symbol = header_df['symbol'].iloc[0]
    
    # Load data to find the specific pass
    data_file = Path(f'{optcache_filename}.Data.csv')
    if not data_file.exists():
        print(f"File {data_file} does not exist")
        return ""
    
    data_df = pd.read_csv(data_file, sep=';', encoding='utf-16')
    
    # Find the pass
    pass_data = data_df[data_df['Pass'] == pass_number]
    if pass_data.empty:
        print(f"Pass {pass_number} not found in {data_file}")
        return ""
    
    # Get parameter columns - all columns after 'avgconloosers'
    all_columns = list(data_df.columns)
    try:
        # Find the index of the last standard MetaTrader field
        last_standard_field_idx = all_columns.index('avgconloosers')
        # All columns after this are optimization parameters
        param_columns = all_columns[last_standard_field_idx + 1:]
    except ValueError:
        # If 'avgconloosers' not found, fall back to 'Inp_' prefix
        param_columns = [col for col in all_columns if col.startswith('Inp_')]
    
    if not param_columns:
        print(f"No input parameters found (columns after 'avgconloosers')")
        return ""
    
    # Create .set file content
    set_content = [
        "; saved automatically on generation",
        "; this file contains input parameters for testing/optimizing expert advisor",
        ";"
    ]
    
    # Add each parameter
    pass_row = pass_data.iloc[0]
    for param in param_columns:
        value = pass_row[param]
        
        # Determine parameter type and format value
        if isinstance(value, (int, np.integer)):
            set_content.append(f"{param}={int(value)}")
        elif isinstance(value, (float, np.floating)):
            set_content.append(f"{param}={value}")
        else:
            set_content.append(f"{param}={value}")
    
    # Determine output path
    if output_dir is None:
        output_dir = Path(optcache_filename).parent
    else:
        output_dir = Path(output_dir)
    
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Calculate prefix width based on total records
    prefix_width = len(str(total_records))
    
    # Create .set filename with zero-padded prefix
    num_prefix = str(record_num).zfill(prefix_width)
    set_filename = output_dir / f"{num_prefix}_{expert_name}.{symbol}.{pass_number}.set"
    
    # Write .set file
    try:
        with open(set_filename, 'w', encoding='utf-8') as f:
            f.write('\n'.join(set_content))
        return str(set_filename)
    except Exception as e:
        print(f"Error writing .set file: {e}")
        return ""

In [42]:
def BuildReportRowsMD(header_df: pd.DataFrame, data_df: pd.DataFrame, filter: dict) -> list:
    """Build markdown report with dynamic columns based on varying values"""
    
    def colorize(field_name, value, text):
        rule = filter.get('fields', {}).get(field_name, {})

        if not rule:
            return text

        color_ranges = rule.get('color_ranges', [])
        if not color_ranges:
            return text

        for cr in color_ranges:
            if value >= cr['range'][0] and value < cr['range'][1]:
                return f'<span style="background-color:{cr["color"]};">{text}</span>'
        return text

    # Define all columns in desired order with their properties
    # 'optional': True means column will be shown only if values vary
    all_columns = [
        {'field': '#', 'title': '#', 'optional': False},
        {'field': 'symbol', 'title': 'Symbol', 'optional': False},
        {'field': 'expert_name', 'title': 'Expert', 'optional': True},
        {'field': 'period', 'title': 'TF', 'optional': True},
        {'field': 'server', 'title': 'Server', 'optional': True},
        {'field': 'trade_deposit', 'title': 'Deposit', 'optional': True},
        {'field': 'ticks_mode', 'title': 'Ticks', 'optional': True},
        {'field': 'pass_num', 'title': 'Pass', 'optional': False},
        {'field': 'profit', 'title': 'Profit', 'optional': False},
        # {'field': 'profit_per_30_days_percent', 'title': "<span title='Profit per 30 days, %'>30d,%</span>", 'optional': False},
        {'field': 'profit_per_365_days_percent', 'title': "<span title='Profit per 365 days, %'>%/yr</span>", 'optional': False},
        {'field': 'reldrawdownpercnt_e', 'title': "<span title='Equity Drawdown, %'>DD%</span>", 'optional': False},
        {'field': 'calmar', 'title': "Calmar", 'optional': False},
        {'field': 'custom_fitness', 'title': "<span title='Custom Criterion'>CC</span>", 'optional': False},
        # {'field': 'trades', 'title': 'Trades', 'optional': False},
        {'field': 'trades_per_30_days', 'title': "<span title='Trades per 30 days'>Pos/mo</span>", 'optional': False},
        {'field': 'win_rate', 'title': "<span title='Win Rate'>WR</span>", 'optional': False},
        {'field': 'profit_factor', 'title': "<span title='Profit Factor'>PF</span>", 'optional': False},
        {'field': 'recovery_factor', 'title': "<span title='Recovery Factor'>RF</span>", 'optional': False},
        {'field': 'sharpe_ratio', 'title': 'Sharpe', 'optional': False},
        {'field': 'rank_score', 'title': 'Rank', 'optional': False},
        {'field': 'rank', 'title': 'Star', 'optional': False},
        {'field': 'comment', 'title': 'Comment', 'optional': False},
    ]
    
    # Determine which columns to show
    columns_to_show = []
    for col in all_columns:
        if col['optional']:
            # Show only if field exists and has varying values
            if col['field'] in data_df.columns and data_df[col['field']].nunique() > 1:
                columns_to_show.append(col)
        else:
            columns_to_show.append(col)
    
    # Build dynamic header
    header_parts = []
    separator_parts = []
    
    for col in columns_to_show:
        header_parts.append(col['title'])
        separator_parts.append('-')
    
    data_report = [
        '| ' + ' | '.join(header_parts) + ' |',
        '| ' + ' | '.join(separator_parts) + ' |'
    ]
    
    # Build dynamic row template parts
    row_parts = []
    for col in columns_to_show:
        row_parts.append('{' + col['field'] + '}')
    
    row_template = '| ' + ' | '.join(row_parts) + ' |'
    
    data_df = data_df.sort_values(filter.get('sort_by', []), ascending=filter.get("sort_dir", []))
    for i, (_, data_row) in enumerate(data_df.iterrows(), 1):
        row_data = {
            '#': int(i),
            'symbol': f"<span title='{data_row['filename']}'>[{data_row['symbol']}]({data_row['filename']})</span>",
            'expert_name': str(data_row['expert_name']),
            'period': str(data_row['period']),
            'server': str(data_row['server']),
            'trade_deposit': f"{data_row['trade_deposit']:.0f}",
            'ticks_mode': str(data_row['ticks_mode']),
            'pass_num': int(data_row['Pass']),
            'profit': colorize('profit', data_row['profit'], f"{data_row['profit']:,.0f}".replace(',', ' ')),
            'profit_per_30_days_percent': colorize('profit_per_30_days_percent', data_row['profit_per_30_days_percent'],
                                                round(data_row['profit_per_30_days_percent'] * 100, 1)),
            'profit_per_365_days_percent': colorize('profit_per_365_days_percent', data_row['profit_per_365_days_percent'],
                                                   round(data_row['profit_per_365_days_percent'] * 100, 1)),
            'calmar': colorize('calmar', data_row['calmar'], round(data_row['calmar'], 1)),
            'win_rate': round(data_row['win_rate'] * 100, 1),
            'custom_fitness': colorize('custom_fitness', data_row['custom_fitness'], round(data_row['custom_fitness'], 2)),
            'trades': int(data_row['trades']),
            'trades_per_30_days': round(data_row['trades_per_30_days'], 1),
            'reldrawdownpercnt_e': colorize('reldrawdownpercnt_e', data_row['reldrawdownpercnt_e'],
                                        round(data_row['reldrawdownpercnt_e'], 2)),
            'profit_factor': round(data_row['profit_factor'], 2),
            'recovery_factor': round(data_row['recovery_factor'], 2),
            'sharpe_ratio': round(data_row['sharpe_ratio'], 2),
            'rank_score': round(data_row['rank_score'], 1),
            'rank': '☆☆☆☆☆',
            'comment': ''
        }
        
        data_report.append(row_template.format(**row_data))

    return data_report

def BuildReportRowsCSV(data_df: pd.DataFrame, header_df: pd.DataFrame, filter: dict) -> list:
    """Build CSV report with all fields"""
    
    data_report_csv = ["num;filename;symbol;period;date_from;date_to;deposit;leverage;server;ticks_mode;pass_num;profit;profit_per_30_days_percent;profit_per_365_days_percent;win_rate;custom_fitness;trades;trades_per_30_days;reldrawdownpercnt_e;profit_factor;recovery_factor;sharpe_ratio"]
    row_template_csv = "{i};{filename};{symbol};{period};{date_from};{date_to};{deposit};{leverage};{server};{ticks_mode};{pass_num};{profit};{profit_per_30_days_percent};{profit_per_365_days_percent};{win_rate};{custom_fitness};{trades};{trades_per_30_days};{reldrawdownpercnt_e};{profit_factor};{recovery_factor};{sharpe_ratio}"

    data_df = data_df.sort_values(filter.get('sort_by', []), ascending=filter.get("sort_dir", []))
    for i, (_, data_row) in enumerate(data_df.iterrows(), 1):
        data_report_csv.append(row_template_csv.format(
            i=int(i),
            filename=data_row['filename'],
            symbol=data_row['symbol'],
            period=data_row['period'],
            date_from=data_row['date_from'],
            date_to=data_row['date_to'],
            deposit=data_row['trade_deposit'],
            leverage=data_row['trade_leverage'],
            server=data_row['server'],
            ticks_mode=data_row['ticks_mode'],
            
            pass_num=int(data_row['Pass']),
            profit=data_row['profit'],
            profit_per_30_days_percent=round(data_row['profit_per_30_days_percent'] * 100, 1),
            profit_per_365_days_percent=round(data_row['profit_per_365_days_percent'] * 100, 1),
            win_rate=round(data_row['win_rate'] * 100, 1),
            custom_fitness=round(data_row['custom_fitness'], 2),
            trades=int(data_row['trades']),
            trades_per_30_days=round(data_row['trades_per_30_days'], 1),
            reldrawdownpercnt_e=round(data_row['reldrawdownpercnt_e'], 2),
            profit_factor=round(data_row['profit_factor'], 2),
            recovery_factor=round(data_row['recovery_factor'], 2),
            sharpe_ratio=round(data_row['sharpe_ratio'], 2)
        ))
    
    return data_report_csv

def BuildHeatmapReportMD(header_df: pd.DataFrame, filtered_df: pd.DataFrame, filter_rule: dict, all_symbols: list = None) -> list:
    """Build markdown heatmap report with symbol cells showing count and calmar from best row
    
    Args:
        header_df: Header DataFrame
        filtered_df: DataFrame with filtered results
        filter_rule: Filter rules dictionary
        all_symbols: List of all symbols (including those with 0 filtered results)
    """
    
    if all_symbols is None:
        all_symbols = sorted(filtered_df['symbol'].unique()) if not filtered_df.empty else []
    
    # Get unique symbols and their stats
    symbols_data = []
    for symbol in sorted(all_symbols):
        symbol_df = filtered_df[filtered_df['symbol'] == symbol] if not filtered_df.empty else pd.DataFrame()
        count = len(symbol_df)
        
        if count > 0:
            # Get calmar from the first (best) row after sorting
            sorted_symbol_df = symbol_df.sort_values(
                filter_rule.get('sort_by', []), 
                ascending=filter_rule.get("sort_dir", [])
            )
            best_calmar = sorted_symbol_df.iloc[0]['calmar']
        else:
            best_calmar = 0
        
        symbols_data.append({
            'symbol': symbol,
            'count': count,
            'calmar': best_calmar
        })
    
    if not symbols_data:
        return ["No symbols found"]
    
    # Determine color based on calmar value and count
    def get_calmar_color(calmar, count):
        if count == 0:
            return '#D3D3D3'  # Gray for no results
        if calmar >= 2.0:
            return '#2EB62C'  # Dark green
        elif calmar >= 1.5:
            return '#83D475'  # Light green
        elif calmar >= 1.0:
            return 'moccasin'  # Yellow
        else:
            return 'pink'  # Red
    
    # Build heatmap table - arrange symbols in a grid (max 6 per row)
    cells_per_row = 6
    report = []
    
    # Build header row
    header = '| ' + ' | '.join([''] * cells_per_row) + ' |'
    separator = '| ' + ' | '.join(['-'] * cells_per_row) + ' |'
    report.append(header)
    report.append(separator)
    
    # Build data rows
    for i in range(0, len(symbols_data), cells_per_row):
        row_data = symbols_data[i:i + cells_per_row]
        cells = []
        for data in row_data:
            color = get_calmar_color(data['calmar'], data['count'])
            if data['count'] > 0:
                cell_text = f"<span style=\"background-color:{color}; padding:5px; display:block; text-align:center;\"><b>{data['symbol']}</b>({data['count']})<br>Calmar: {data['calmar']:.1f}</span>"
            else:
                cell_text = f"<span style=\"background-color:{color}; padding:5px; display:block; text-align:center;\"><b>{data['symbol']}</b>(0)</span>"
            cells.append(cell_text)
        
        # Pad with empty cells if needed
        while len(cells) < cells_per_row:
            cells.append('')
        
        report.append('| ' + ' | '.join(cells) + ' |')
    
    return report

def BuildHeatmapHTML(header_df: pd.DataFrame, filtered_df: pd.DataFrame, filter_rule: dict, 
                     all_symbols: list, output_filename: str) -> str:
    """Build interactive HTML heatmap using Plotly
    
    Args:
        header_df: Header DataFrame
        filtered_df: DataFrame with filtered results
        filter_rule: Filter rules dictionary
        all_symbols: List of all symbols (including those with 0 filtered results)
        output_filename: Output HTML filename
    
    Returns:
        Path to saved HTML file
    """
    
    # Collect symbol data
    symbols_data = []
    for symbol in sorted(all_symbols):
        symbol_df = filtered_df[filtered_df['symbol'] == symbol] if not filtered_df.empty else pd.DataFrame()
        count = len(symbol_df)
        
        if count > 0:
            sorted_symbol_df = symbol_df.sort_values(
                filter_rule.get('sort_by', []), 
                ascending=filter_rule.get("sort_dir", [])
            )
            best_row = sorted_symbol_df.iloc[0]
            best_calmar = best_row['calmar']
            best_profit_pct = best_row['profit_per_365_days_percent'] * 100
            best_dd = best_row['reldrawdownpercnt_e']
            best_cc = best_row['custom_fitness']
        else:
            best_calmar = 0
            best_profit_pct = 0
            best_dd = 0
            best_cc = 0
        
        symbols_data.append({
            'symbol': symbol,
            'count': count,
            'calmar': best_calmar,
            'profit_pct': best_profit_pct,
            'dd': best_dd,
            'cc': best_cc,
            'category': ClassifySymbol(symbol)
        })
    
    # Sort by category then symbol
    symbols_data.sort(key=lambda x: (x['category'], x['symbol']))
    
    # Arrange in grid (6 columns)
    cols = 6
    rows = (len(symbols_data) + cols - 1) // cols
    
    # Prepare data for heatmap
    z_values = []  # Calmar values for color
    text_values = []  # Text to display
    hover_values = []  # Hover text
    
    for row_idx in range(rows):
        z_row = []
        text_row = []
        hover_row = []
        for col_idx in range(cols):
            idx = row_idx * cols + col_idx
            if idx < len(symbols_data):
                data = symbols_data[idx]
                z_row.append(data['calmar'] if data['count'] > 0 else -1)  # -1 for no data
                text_row.append(f"<b>{data['symbol']}({data['count']})</b><br><br>Profit/yr: {data['profit_pct']:.1f}%<br>DD: {data['dd']:.1f}%<br>CC: {data['cc']:.1f}")
                hover_row.append(
                    f"<b>{data['symbol']}</b> ({data['category']})<br>"
                    f"Sets: {data['count']}<br>"
                    f"Calmar: {data['calmar']:.2f}<br>"
                    f"Profit/yr: {data['profit_pct']:.1f}%<br>"
                    f"DD: {data['dd']:.1f}%<br>"
                    f"CC: {data['cc']:.2f}"
                )
            else:
                z_row.append(None)
                text_row.append('')
                hover_row.append('')
        z_values.append(z_row)
        text_values.append(text_row)
        hover_values.append(hover_row)
    
    # Create custom colorscale
    colorscale = [
        [0.0, '#D3D3D3'],   # Gray for no data (-1 to 0)
        [0.1, 'pink'],      # Red for low calmar
        [0.3, 'moccasin'],  # Yellow for medium
        [0.5, '#83D475'],   # Light green
        [1.0, '#2EB62C']    # Dark green for high calmar
    ]
    
    # Get header info
    row = header_df.iloc[0]
    date_from = pd.to_datetime(row["date_from"]).strftime("%Y-%m-%d")
    date_to = pd.to_datetime(row["date_to"]).strftime("%Y-%m-%d")
    
    # Create figure
    fig = go.Figure(data=go.Heatmap(
        z=z_values,
        text=text_values,
        texttemplate="%{text}",
        textfont={"size": 12},
        hovertext=hover_values,
        hovertemplate="%{hovertext}<extra></extra>",
        colorscale=colorscale,
        zmin=-1,
        zmax=5,
        showscale=False
    ))
    
    # Update layout
    fig.update_layout(
        title=dict(
            text=f"<b>Heatmap: {row['expert_name']}</b><br>"
                 f"<sup>{date_from} → {date_to} | {row['server']} | {len(all_symbols)} symbols</sup>",
            x=0.5,
            font=dict(size=18)
        ),
        xaxis=dict(showticklabels=False, showgrid=False),
        yaxis=dict(showticklabels=False, showgrid=False, autorange='reversed'),
        plot_bgcolor='white',
        width=900,
        height=150 + rows * 100,
        margin=dict(l=20, r=20, t=80, b=20)
    )
    
    # Save to HTML
    fig.write_html(output_filename, include_plotlyjs='cdn')
    
    return output_filename

## 2. Сортировка и вывод топ 5 результатов

In [43]:
dir = Path('csv')
set_output_dir = Path('set_files')

full_df = pd.DataFrame()

report_md = []
report_cvs = ["num;filename;symbol;period;date_from;date_to;deposit;leverage;server;ticks_mode;pass_num;profit;profit_per_30_days_percent;profit_per_365_days_percent;win_rate;custom_fitness;trades;trades_per_30_days;reldrawdownpercnt_e;profit_factor;recovery_factor;sharpe_ratio"]

opt_files = sorted(dir.glob("*.opt"))
if not opt_files:
    print(f"No .opt files found in {dir}")

# Collect all symbols from all .opt header files (before filtering)
all_symbols = []
for opt_file in opt_files:
    hdr = LoadHeaderFromFileToDF(opt_file)
    if not hdr.empty:
        all_symbols.append(hdr['symbol'].iloc[0])
all_symbols = sorted(set(all_symbols))

# build combined dataframe from all .opt files
full_df = pd.DataFrame()
for opt_file in opt_files:
    opt_df = BuildReportDF(opt_file, filter_rule)
    if not opt_df.empty:
        full_df = pd.concat([full_df, opt_df], ignore_index=True)

# load header from the first .opt file
header_df = LoadHeaderFromFileToDF(opt_files[0])

# Get report parameters for filename
expert_name = header_df['expert_name'].iloc[0] if not header_df.empty else 'Unknown'
server = header_df['server'].iloc[0] if not header_df.empty else 'Unknown'

# Generate report filename using all_symbols count (before filtering)
report_filename = f"{expert_name}_{server}_{len(all_symbols)}sym.md"

# build MD-report
report_header = BuildHeaderReport(header_df, full_df)
report_data = BuildReportRowsMD(header_df, full_df, filter_rule)
report_md = report_header + [''] + report_data

# Save MD report
with open(report_filename, "w", encoding="utf-8") as f:
    for line in report_md:
        f.write(line + "\n")

print(f"MD report saved: {report_filename}")

# Build and save Heatmap MD report
heatmap_filename = f"{expert_name}_{server}_{len(all_symbols)}sym_heatmap.md"

# Build header for heatmap using all_symbols
grouped_symbols = GroupSymbolsByCategory(all_symbols)
row = header_df.iloc[0]
date_from = pd.to_datetime(row["date_from"]).strftime("%Y-%m-%d")
date_to = pd.to_datetime(row["date_to"]).strftime("%Y-%m-%d")

heatmap_header = [
    f'## Backtest Optimization for {header_df["expert_name"].iloc[0]} / {len(all_symbols)} sym',
    f'<table>',
    f'    <tr>',
    f'        <td style="vertical-align: top"> ',
    f'            <b>Bot:</b> {row["expert_name"]} <br>',
    f'            <b>Interval:</b> [{date_from}; {date_to})<br>',
    f'            <b>Duration:</b> {row["days"]} days',
    f'        </td>',
    f'        <td style="vertical-align: top"> ',
    f'            <b>Server:</b> {row["server"]}<br>',
    f'            <b>Deposit:</b> {row["trade_deposit"]}<br>',
    f'            <b>Leverage:</b> 100',
    f'        </td>',
    f'    </tr>',
    f'    <tr>',
    f'        <td colspan=2>',
]
for category, syms in grouped_symbols.items():
    heatmap_header.append(f'<b>{category}:</b> {", ".join(syms)}')
heatmap_header.append(f'        </td>')
heatmap_header.append(f'    </tr>')
heatmap_header.append(f'</table>')

heatmap_data = BuildHeatmapReportMD(header_df, full_df, filter_rule, all_symbols)
heatmap_md = heatmap_header + [''] + heatmap_data

with open(heatmap_filename, "w", encoding="utf-8") as f:
    for line in heatmap_md:
        f.write(line + "\n")

print(f"Heatmap MD report saved: {heatmap_filename}")

# Build and save Heatmap HTML report using Plotly
heatmap_html_filename = f"{expert_name}_{server}_{len(all_symbols)}sym_heatmap.html"
BuildHeatmapHTML(header_df, full_df, filter_rule, all_symbols, heatmap_html_filename)
print(f"Heatmap HTML report saved: {heatmap_html_filename}")

# Create .set files for all records in the report
if not full_df.empty:
    set_output_dir.mkdir(parents=True, exist_ok=True)
    
    # Sort full_df the same way as in the report
    sorted_df = full_df.sort_values(filter_rule.get('sort_by', []), ascending=filter_rule.get("sort_dir", []))
    
    total_records = len(sorted_df)
    print(f"\nCreating .set files in {set_output_dir}/...")
    created_count = 0
    
    for idx, row in sorted_df.iterrows():
        opt_filename = row['filename']
        pass_num = int(row['Pass'])
        record_num = created_count + 1  # 1-based numbering
        
        set_file = CreateSetFile(
            opt_filename, 
            pass_num, 
            output_dir=str(set_output_dir),
            record_num=record_num,
            total_records=total_records
        )
        if set_file:
            created_count += 1
    
    print(f"Created {created_count} .set files")

# report_csv = BuildReportRowsCSV(full_df, header_df, filter_rule)
# with open("optcache_report.csv", "w", encoding="utf-8") as f:
#     for line in report_csv:
#         f.write(line + "\n")

MD report saved: HTFVWAP-MT5-Bot-2.00_Tickmill-Demo_3sym.md
Heatmap MD report saved: HTFVWAP-MT5-Bot-2.00_Tickmill-Demo_3sym_heatmap.md
Heatmap HTML report saved: HTFVWAP-MT5-Bot-2.00_Tickmill-Demo_3sym_heatmap.html

Creating .set files in set_files/...
Created 167 .set files
Created 167 .set files


In [41]:
# Check calmar and rank_score columns
print("Columns with calmar and rank_score:")
print(full_df[['symbol', 'Pass', 'calmar', 'rank_score', 'custom_fitness', 'profit_per_365_days_percent', 'reldrawdownpercnt_e']].head(1000).to_string())

Columns with calmar and rank_score:
    symbol  Pass    calmar  rank_score  custom_fitness  profit_per_365_days_percent  reldrawdownpercnt_e
0     AAPL  3008  3.851496       83.52            0.99                     0.392309            10.185888
1     AAPL  2878  3.831575       82.99            0.99                     0.391129            10.208051
2     AAPL  2397  3.444446       73.12            0.99                     0.381729            11.082457
3     AAPL  2869  3.444446       73.12            0.99                     0.381729            11.082457
4     AAPL  2816  3.413178       72.56            0.99                     0.356966            10.458478
5     AAPL  2714  3.283206       71.58            0.99                     0.389257            11.855988
6     AAPL  2880  3.193882       71.54            0.98                     0.391892            12.270099
7     AAPL  3009  3.193882       71.54            0.98                     0.391892            12.270099
8     AAPL  2932  3