In [1]:
import numpy as np
import pandas as pd
import pandas_ta as ta
from tqdm import tqdm
import re
import joblib
from typing import Any, Dict, List
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import time
from datetime import timedelta, datetime
import os
from joblib import Parallel, delayed
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, classification_report, precision_score, recall_score, roc_auc_score, precision_recall_curve, precision_recall_curve, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.feature_selection import mutual_info_classif
import shap
from ipywidgets import widgets
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from IPython.display import display

# –†–∞–∑–º–µ—Ç–∫–∞ –¥–∞–Ω–Ω—ã—Ö

## plot_ohlc_signals

–†–∏—Å—É–µ—Ç –≥—Ä–∞—Ñ–∏–∫ OHLC —Å –ø–æ–¥—Å–≤–µ—Ç–∫–æ–π buy/sell —Å–∏–≥–Ω–∞–ª–æ–≤ –∏ —à—É–º–Ω—ã—Ö —Å–∏–≥–Ω–∞–ª–æ–≤
    
–ê—Ä–≥—É–º–µ–Ω—Ç—ã:
- df —Å –∫–æ–ª–æ–Ω–∫–∞–º–∏ *'Open','High','Low','Close','buy','sell','buy_noised','sell_noised'*
- **start_idx**: –Ω–∞—á–∞–ª—å–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞
- **end_idx**: –∫–æ–Ω–µ—á–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞

In [4]:
def plot_ohlc_signals(df, start_idx=0, end_idx=None):
    """
    –†–∏—Å—É–µ—Ç –≥—Ä–∞—Ñ–∏–∫ OHLC —Å –ø–æ–¥—Å–≤–µ—Ç–∫–æ–π buy/sell —Å–∏–≥–Ω–∞–ª–æ–≤ –∏ —à—É–º–Ω—ã—Ö —Å–∏–≥–Ω–∞–ª–æ–≤
    
    Args:
        df: DataFrame —Å –∫–æ–ª–æ–Ω–∫–∞–º–∏ 'Open','High','Low','Close','buy','sell','buy_noised','sell_noised'
        start_idx: –Ω–∞—á–∞–ª—å–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞
        end_idx: –∫–æ–Ω–µ—á–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞
    """
    if end_idx is None:
        end_idx = len(df)
    
    plot_data = df.iloc[start_idx:end_idx].copy()
    
    plt.figure(figsize=(16, 8))
    
    # –†–∏—Å—É–µ–º –≤—Å–µ —Ü–µ–Ω–æ–≤—ã–µ –ª–∏–Ω–∏–∏
    plt.plot(plot_data.index, plot_data['Close'], 'b-', label='Close', linewidth=1.5)
    plt.plot(plot_data.index, plot_data['Open'], 'g--', label='Open', linewidth=1, alpha=0.7)
    plt.plot(plot_data.index, plot_data['High'], 'c:', label='High', linewidth=1, alpha=0.7)
    plt.plot(plot_data.index, plot_data['Low'], 'm:', label='Low', linewidth=1, alpha=0.7)
    
   
     # –®—É–º–Ω—ã–µ —Å–∏–≥–Ω–∞–ª—ã –ø–æ–∫—É–ø–∫–∏ (–±–æ–ª–µ–µ –ø—Ä–æ–∑—Ä–∞—á–Ω—ã–µ –∏ –º–µ–Ω—å—à–µ–≥–æ —Ä–∞–∑–º–µ—Ä–∞)
    buy_noised_signals = plot_data[plot_data['buy_noised'] == 1]
    if not buy_noised_signals.empty:
        plt.scatter(buy_noised_signals.index, buy_noised_signals['Close'], 
                   color='blue', marker='^', s=80, label='Buy noised', 
                   zorder=3, alpha=0.6, edgecolors='darkgreen', linewidth=0.5)
        
    # –û—Å–Ω–æ–≤–Ω—ã–µ —Å–∏–≥–Ω–∞–ª—ã –ø–æ–∫—É–ø–∫–∏
    buy_main_signals = plot_data[plot_data['buy'] == 1]
    if not buy_main_signals.empty:
        plt.scatter(buy_main_signals.index, buy_main_signals['Close'], 
                   color='lightgreen', marker='^', s=80, label='Buy main', 
                   zorder=3, alpha=0.6, edgecolors='darkgreen', linewidth=0.5)
    
    # –û—Å–Ω–æ–≤–Ω—ã–µ —Å–∏–≥–Ω–∞–ª—ã –ø—Ä–æ–¥–∞–∂–∏
    sell_noised_signals = plot_data[plot_data['sell'] == 1]
    if not sell_noised_signals.empty:
        plt.scatter(sell_noised_signals.index, sell_noised_signals['Close'], 
                   color='lightcoral', marker='v', s=80, label='Sell main', 
                   zorder=3, alpha=0.6, edgecolors='darkred', linewidth=0.5)
    
    # –§–∏–Ω–∞–ª—å–Ω—ã–µ —Å–∏–≥–Ω–∞–ª—ã –ø–æ–∫—É–ø–∫–∏ (–±–æ–ª–µ–µ —è—Ä–∫–∏–µ –∏ –∫—Ä—É–ø–Ω—ã–µ)
    buy_signals = plot_data[plot_data['buy_strong'] == 1]
    if not buy_signals.empty:
        plt.scatter(buy_signals.index, buy_signals['Close'], 
                   color='green', marker='^', s=20, label='Buy Strong', zorder=5)
    
    # –§–∏–Ω–∞–ª—å–Ω—ã–µ —Å–∏–≥–Ω–∞–ª—ã –ø—Ä–æ–¥–∞–∂–∏ (–±–æ–ª–µ–µ —è—Ä–∫–∏–µ –∏ –∫—Ä—É–ø–Ω—ã–µ)
    sell_signals = plot_data[plot_data['sell_strong'] == 1]
    if not sell_signals.empty:
        plt.scatter(sell_signals.index, sell_signals['Close'], 
                   color='red', marker='v', s=20, label='Sell Strong', zorder=5)
    
    plt.title('OHLC Prices with Buy/Sell Signals')
    plt.xlabel('Candles')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# –ö–æ—Ä—Ä–µ–ª—è—Ü–∏–æ–Ω–Ω—ã–π –∞–Ω–∞–ª–∏–∑

## plot_corr_by_distance

**plot_corr_by_distance** –°—Ç—Ä–æ–∏—Ç –≥—Ä–∞—Ñ–∏–∫ —Å—Ä–µ–¥–Ω–µ–π –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–∏ (–ø–æ –º–æ–¥—É–ª—é) –º–µ–∂–¥—É target –∏ –≤—Å–µ–º–∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏.
- –ø–æ –æ—Å–∏ **y** —É–∫–∞–∑–∞–Ω—ã –∑–Ω–∞—á–µ–Ω–∏—è –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–∏ –º–µ–∂–¥—É —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π **target** –∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏
- –ø–æ –æ—Å–∏ **x** –æ—Ç–ª–æ–∂–µ–Ω–æ —Ä–∞—Å—Å—Ç–æ—è–Ω–∏–µ –æ—Ç —Ç–µ–∫—É—â–µ–π –∑–∞–∫—Ä—ã—Ç–æ–π —Å–≤–µ—á–∏ –¥–æ —Å–≤–µ—á–µ–π –Ω–∞ –∫–æ—Ç–æ—Ä—ã—Ö –±—ã–ª –ø—Ä–æ–≤–µ–¥–µ–Ω —Ä–∞—Å—á–µ—Ç –ø—Ä–∏–∑–Ω–∞–∫–∞

In [28]:
def plot_corr_by_distance(df, target='buy', pattern=r'_(\d+)$', min_corr=0.01):
    """
    –°—Ç—Ä–æ–∏—Ç –≥—Ä–∞—Ñ–∏–∫ —Å—Ä–µ–¥–Ω–µ–π –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–∏ (–ø–æ –º–æ–¥—É–ª—é) –º–µ–∂–¥—É target –∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏,
    –≥–¥–µ –≤ –Ω–∞–∑–≤–∞–Ω–∏–∏ –ø—Ä–∏–∑–Ω–∞–∫–∞ –µ—Å—Ç—å —á–∏—Å–ª–æ (—É–¥–∞–ª–µ–Ω–∏–µ –æ—Ç —Ç–µ–∫—É—â–µ–π —Ü–µ–Ω—ã).
    –í —Ä–∞—Å—á–µ—Ç –±–µ—Ä—É—Ç—Å—è —Ç–æ–ª—å–∫–æ –ø—Ä–∏–∑–Ω–∞–∫–∏ —Å |corr| > min_corr.
    """
    corr = df.corr(numeric_only=True)[target].drop(target).abs()

    # –§–∏–ª—å—Ç—Ä–∞—Ü–∏—è —Å–ª–∞–±—ã—Ö –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–π
    corr = corr[corr > min_corr]

    distance_corrs = {}
    for col, val in corr.items():
        match = re.search(pattern, col)
        if match:
            dist = int(match.group(1))
            distance_corrs.setdefault(dist, []).append(val)

    avg_corr = {dist: np.mean(vals) for dist, vals in distance_corrs.items()}

    distances = sorted(avg_corr.keys())
    values = [avg_corr[d] for d in distances]

    plt.figure(figsize=(8, 4))
    plt.plot(distances, values, marker='o')
    plt.title('–ì—Ä–∞—Ñ–∏–∫ —É–≥–∞—Å–∞–Ω–∏—è –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–∏')
    plt.xlabel('–†–∞—Å—Å—Ç–æ—è–Ω–∏–µ –æ—Ç —Ç–µ–∫—É—â–µ–π —Å–≤–µ—á–∏')
    plt.ylabel('–°—Ä–µ–¥–Ω—è—è |corr|')
    plt.grid(True)
    plt.show()

## plot_correlation_matrix

**plot_correlation_matrix** –°—Ç—Ä–æ–∏—Ç —Ç–µ–ø–ª–æ–≤—É—é –∫–∞—Ä—Ç—É –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–π —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π –∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏

–í—Ö–æ–¥:
- df —Å –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏ –∏ —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π
- drop_columns –±–∞–∑–æ–≤—ã–µ/–ø—Ä–æ–º–µ–∂—É—Ç–æ—á–Ω—ã–µ –∫–æ–ª–æ–Ω–∫–∏ –Ω–µ —Ç—Ä–µ–±—É—é—â–∏–µ –∏—Å—Å–ª–µ–¥–æ–≤–∞–Ω–∏—è
- top_n - —Ñ–∏–ª—å—Ç—Ä –ø–æ –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–∏
- target - —Ü–µ–ª–µ–≤–∞—è –ø–µ—Ä–µ–º–µ–Ω–Ω–∞—è

In [41]:
def plot_correlation_matrix(df, target=None, drop_columns=['Data', 'High', 'Low', 'Close', 'Open', 'Volume'], top_n=30):
    """
    –°—Ç—Ä–æ–∏—Ç –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–æ–Ω–Ω—É—é –º–∞—Ç—Ä–∏—Ü—É –¥–ª—è —Ç–æ–ø-N –ø—Ä–∏–∑–Ω–∞–∫–æ–≤, –Ω–∞–∏–±–æ–ª–µ–µ –∫–æ—Ä—Ä–µ–ª–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö —Å —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π
    
    Args:
        df: DataFrame —Å –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏
        target: –∏–º—è —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π (–µ—Å–ª–∏ None - –∫–æ—Ä—Ä–µ–ª—è—Ü–∏—è –º–µ–∂–¥—É –≤—Å–µ–º–∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏)
        drop_columns: –∫–æ–ª–æ–Ω–∫–∏ –¥–ª—è –∏—Å–∫–ª—é—á–µ–Ω–∏—è –∏–∑ –∞–Ω–∞–ª–∏–∑–∞
        top_n: –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
    """
    try:
        # –£–¥–∞–ª—è–µ–º –Ω–µ–Ω—É–∂–Ω—ã–µ –∫–æ–ª–æ–Ω–∫–∏
        data = df.drop(drop_columns, axis=1, errors='ignore')
        
        # –ï—Å–ª–∏ —É–∫–∞–∑–∞–Ω–∞ —Ü–µ–ª–µ–≤–∞—è –ø–µ—Ä–µ–º–µ–Ω–Ω–∞—è - –≤—ã–±–∏—Ä–∞–µ–º —Ç–æ–ø-N –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –ø–æ –∫–æ—Ä—Ä–µ–ª—è—Ü–∏–∏ —Å –Ω–µ–π
        if target is not None and target in data.columns:
            # –í—ã—á–∏—Å–ª—è–µ–º –∫–æ—Ä—Ä–µ–ª—è—Ü–∏—é —Å —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π
            target_corr = data.corr()[target].abs().sort_values(ascending=False)
            
            # –ë–µ—Ä–µ–º —Ç–æ–ø-N –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ (–≤–∫–ª—é—á–∞—è —Å–∞–º—É —Ü–µ–ª–µ–≤—É—é)
            top_features = target_corr.head(top_n).index.tolist()
            
            # –£–±–µ–¥–∏–º—Å—è, —á—Ç–æ —Ü–µ–ª–µ–≤–∞—è –ø–µ—Ä–µ–º–µ–Ω–Ω–∞—è –µ—Å—Ç—å –≤ —Å–ø–∏—Å–∫–µ
            if target not in top_features:
                top_features.append(target)
                
            corr_matrix = data[top_features].corr()
            
        else:
            # –°—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–π –ø–æ–¥—Ö–æ–¥ - –∫–æ—Ä—Ä–µ–ª—è—Ü–∏—è –º–µ–∂–¥—É –≤—Å–µ–º–∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏
            corr_matrix = data.corr()
            
            # –ï—Å–ª–∏ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ —Å–ª–∏—à–∫–æ–º –º–Ω–æ–≥–æ - –æ–≥—Ä–∞–Ω–∏—á–∏–≤–∞–µ–º —Ç–æ–ø-N
            if len(corr_matrix) > top_n:
                mean_abs_corr = corr_matrix.abs().mean().sort_values(ascending=False)
                top_features = mean_abs_corr.head(top_n).index
                corr_matrix = corr_matrix.loc[top_features, top_features]
        
        num_features = len(corr_matrix)
        
        # –î–∏–Ω–∞–º–∏—á–µ—Å–∫–∏–µ –Ω–∞—Å—Ç—Ä–æ–π–∫–∏ –≤ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –æ—Ç –∫–æ–ª–∏—á–µ—Å—Ç–≤–∞ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
        if num_features <= 15:
            figsize = (10, 8)
            font_scale = 1.2
            annot = True
            label_size = 10
        elif num_features <= 30:
            figsize = (16, 14)
            font_scale = 1.0
            annot = False
            label_size = 9
        else:
            figsize = (20, 18)
            font_scale = 0.8
            annot = False
            label_size = 8
            plt.rcParams['xtick.major.pad'] = 0.5
            plt.rcParams['ytick.major.pad'] = 0.5
        
        # –ù–∞—Å—Ç—Ä–æ–π–∫–∞ —Å—Ç–∏–ª—è
        sns.set(font_scale=font_scale)
        plt.figure(figsize=figsize)
        
        # –ü–æ—Å—Ç—Ä–æ–µ–Ω–∏–µ —Ç–µ–ø–ª–æ–≤–æ–π –∫–∞—Ä—Ç—ã
        heatmap = sns.heatmap(
            corr_matrix,
            cmap='coolwarm',
            annot=annot,
            fmt=".2f",
            square=True,
            linewidths=0.5,
            cbar_kws={"shrink": 0.7},
            mask=np.triu(np.ones_like(corr_matrix, dtype=bool)),
            annot_kws={"size": 8} if annot else None
        )
        
        # –ù–∞—Å—Ç—Ä–æ–π–∫–∞ –ø–æ–¥–ø–∏—Å–µ–π –æ—Å–µ–π
        heatmap.set_xticklabels(
            heatmap.get_xticklabels(),
            rotation=45,
            ha='right',
            fontsize=label_size
        )
        heatmap.set_yticklabels(
            heatmap.get_yticklabels(),
            rotation=0,
            fontsize=label_size
        )
        
        # –§–æ—Ä–º–∏—Ä—É–µ–º –∑–∞–≥–æ–ª–æ–≤–æ–∫
        if target is not None and target in data.columns:
            title = f'–ö–æ—Ä—Ä–µ–ª—è—Ü–∏–æ–Ω–Ω–∞—è –º–∞—Ç—Ä–∏—Ü–∞ (—Ç–æ–ø-{num_features} –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ —Å "{target}")'
        else:
            title_suffix = f' (—Ç–æ–ø-{num_features} –∏–∑ {len(data.columns)} –ø—Ä–∏–∑–Ω–∞–∫–æ–≤)' if len(data.columns) > num_features else f' ({num_features} –ø—Ä–∏–∑–Ω–∞–∫–æ–≤)'
            title = f'–ö–æ—Ä—Ä–µ–ª—è—Ü–∏–æ–Ω–Ω–∞—è –º–∞—Ç—Ä–∏—Ü–∞{title_suffix}'
            
        plt.title(title, fontsize=14)
        plt.tight_layout()
        plt.show()
        
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–æ—Å—Ç—Ä–æ–µ–Ω–∏–∏ –≥—Ä–∞—Ñ–∏–∫–∞: {str(e)}")

# –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è –ø—Ä–∏–∑–Ω–∞–∫–æ–≤

**plot_price_with_indicators** –°—Ç—Ä–æ–∏—Ç –æ—Å–Ω–æ–≤–Ω–æ–π –≥—Ä–∞—Ñ–∏–∫ —Ü–µ–Ω—ã –∏ –≤–∏–∑—É–∞–ª–∏–∑–∏—Ä—É–µ—Ç –∏—Å–ø–æ–ª—å–∑—É–µ–º—ã–µ –ø—Ä–∏–∑–Ω–∞–∫–∏ - –∏–Ω–¥–∏–∫–∞—Ç–æ—Ä—ã.

–ù–∞ –≤—Ö–æ–¥ –ø–æ–¥–∞–µ—Ç—Å—è:
- df —Å –∫–æ–ª–æ–Ω–∫–æ–π —Ü–µ–Ω—ã **Close**
- –∏—Å—Å–ª–µ–¥—É–µ–º—ã–µ –≥—Ä–∞–Ω–∏—Ü—ã –≥—Ä–∞—Ñ–∏–∫–∞ - **start / end**
- –∑–∞–≥–æ–ª–æ–≤–æ–∫ –≥—Ä–∞—Ñ–∏–∫–∞
- —Å–ø–∏—Å–æ–∫ –∏–Ω–¥–∏–∫–∞—Ç–æ—Ä–æ–≤ **indicators**

In [33]:
def plot_price_with_indicators(df, indicators, start=-400, end=-200, colors=None, title=None):
    """
    df          - DataFrame —Å –∫–æ–ª–æ–Ω–∫–∞–º–∏ Close –∏ –∏–Ω–¥–∏–∫–∞—Ç–æ—Ä–∞–º–∏
    indicators  - —Å–ø–∏—Å–æ–∫ –Ω–∞–∑–≤–∞–Ω–∏–π –∏–Ω–¥–∏–∫–∞—Ç–æ—Ä–æ–≤ –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
    start, end  - –¥–∏–∞–ø–∞–∑–æ–Ω —Å—Ä–µ–∑–∞ df
    colors      - —Å–ø–∏—Å–æ–∫ —Ü–≤–µ—Ç–æ–≤ (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)
    title       - –∑–∞–≥–æ–ª–æ–≤–æ–∫ –≥—Ä–∞—Ñ–∏–∫–∞ (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)
    """
    df_slice = df.iloc[start:end]
    
    # –ù–∞—Å—Ç—Ä–æ–π–∫–∞ —Å—Ç–∏–ª—è –≥—Ä–∞—Ñ–∏–∫–∞
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.figure(figsize=(10, 6), facecolor='#f8f9fa')
    
    # —Ü–µ–Ω–∞ - –æ—Å–Ω–æ–≤–Ω–∞—è –ª–∏–Ω–∏—è (–∂–∏—Ä–Ω–∞—è –∏ —á–µ—Ç–∫–∞—è)
    plt.plot(df_slice['Close'], label='Close', color='#2c3e50', linewidth=2.5, alpha=0.9)
    plt.ylabel('Close Price', fontsize=12)
    plt.xlabel('Minutes', fontsize=12)
    
    # –ù–∞—Å—Ç—Ä–æ–π–∫–∞ —Ñ–æ–Ω–∞ –æ–±–ª–∞—Å—Ç–∏ –≥—Ä–∞—Ñ–∏–∫–∞
    ax = plt.gca()
    ax.set_facecolor('#f0f3f5')
    
    # –∏–Ω–¥–∏–∫–∞—Ç–æ—Ä—ã –Ω–∞ –≤—Ç–æ—Ä–æ–º axes
    ax2 = plt.twinx()
    ax2.set_ylabel('Indicators', fontsize=12)
    ax2.set_facecolor('#f0f3f5')
    
    if colors is None:
        # –ü—Ä–∏–≥–ª—É—à–µ–Ω–Ω—ã–µ —Ü–≤–µ—Ç–∞ –¥–ª—è –∏–Ω–¥–∏–∫–∞—Ç–æ—Ä–æ–≤
        colors = ['#e74c3c', '#3498db', '#27ae60', '#f39c12', '#8e44ad', 
                 '#16a085', '#d35400', '#2c3e50', '#7f8c8d', '#9b59b6']
    
    # –≠–ª–µ–≥–∞–Ω—Ç–Ω—ã–µ —Å—Ç–∏–ª–∏ –ª–∏–Ω–∏–π
    line_styles = ['--', '-.', ':', '--', '-.', ':']
    
    for i, ind in enumerate(indicators):
        # –®—Ç—Ä–∏—Ö–æ–≤—ã–µ –ª–∏–Ω–∏–∏ —Å —Ö–æ—Ä–æ—à–µ–π –ø—Ä–æ–∑—Ä–∞—á–Ω–æ—Å—Ç—å—é
        ax2.plot(df_slice[ind], label=ind, 
                color=colors[i % len(colors)], 
                linestyle=line_styles[i % len(line_styles)],
                linewidth=1.8, 
                alpha=0.7)  # –æ–ø—Ç–∏–º–∞–ª—å–Ω–∞—è –ø—Ä–æ–∑—Ä–∞—á–Ω–æ—Å—Ç—å
    
    # –ª–µ–≥–µ–Ω–¥–∞
    lines1, labels1 = ax.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    plt.legend(lines1 + lines2, labels1 + labels2, loc='upper left',
              frameon=True, fancybox=True, shadow=True, fontsize=10)
    
    # –ù–∞—Å—Ç—Ä–æ–π–∫–∞ —Å–µ—Ç–∫–∏
    ax.grid(True, linestyle='--', alpha=0.3)  # –æ—á–µ–Ω—å –ª–µ–≥–∫–∞—è —Å–µ—Ç–∫–∞
    ax2.grid(False)
    
    if title:
        plt.title(title, fontsize=14, fontweight='bold', pad=20)
    
    plt.tight_layout()
    plt.show()

# –†–∞—Å—á—ë—Ç –∏ –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è –≤–∞–∂–Ω–æ—Å—Ç–∏ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –Ω–∞ –æ—Å–Ω–æ–≤–µ Mutual Information (–¥–æ –º–æ–¥–µ–ª–µ–π)

**mutual_info_classif** –≤—ã—á–∏—Å–ª—è–µ—Ç –≤–∑–∞–∏–º–Ω—É—é –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –º–µ–∂–¥—É –∫–∞–∂–¥—ã–º –ø—Ä–∏–∑–Ω–∞–∫–æ–º –∏ —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π.\

–í—Ö–æ–¥–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ:
- X_train ‚Äî –º–∞—Ç—Ä–∏—Ü–∞ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
- y_train ‚Äî —Ü–µ–ª–µ–≤–∞—è –ø–µ—Ä–µ–º–µ–Ω–Ω–∞—è
- top_n ‚Äî –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ç–æ–ø-–ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
- random_state ‚Äî seed –¥–ª—è –≤–æ—Å–ø—Ä–æ–∏–∑–≤–æ–¥–∏–º–æ—Å—Ç–∏

–ü—Ä–æ—Ü–µ—Å—Å —Ä–∞–±–æ—Ç—ã:
- –í—ã—á–∏—Å–ª—è–µ—Ç Mutual Information –º–µ–∂–¥—É –∫–∞–∂–¥—ã–º –ø—Ä–∏–∑–Ω–∞–∫–æ–º –∏ —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π
- –°–æ—Ä—Ç–∏—Ä—É–µ—Ç –ø—Ä–∏–∑–Ω–∞–∫–∏ –ø–æ —É–±—ã–≤–∞–Ω–∏—é –≤–∞–∂–Ω–æ—Å—Ç–∏
- –í—ã–≤–æ–¥–∏—Ç —Ç–∞–±–ª–∏—Ü—É —Ç–æ–ø-N –Ω–∞–∏–±–æ–ª–µ–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ç–∏–≤–Ω—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
- –°—Ç—Ä–æ–∏—Ç –≥–æ—Ä–∏–∑–æ–Ω—Ç–∞–ª—å–Ω—ã–π –±–∞—Ä—á–∞—Ä—Ç –¥–ª—è –Ω–∞–≥–ª—è–¥–Ω–æ–π –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏

–û—Å–æ–±–µ–Ω–Ω–æ—Å—Ç–∏:
- –†–∞–±–æ—Ç–∞–µ—Ç —Å –∫–∞—Ç–µ–≥–æ—Ä–∏–∞–ª—å–Ω—ã–º–∏ –∏ —á–∏—Å–ª–æ–≤—ã–º–∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏
- –û—Ü–µ–Ω–∏–≤–∞–µ—Ç –Ω–µ–ª–∏–Ω–µ–π–Ω—ã–µ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏: –ú–æ–∂–µ—Ç –≤—ã—è–≤–∏—Ç—å —Å–ª–æ–∂–Ω—ã–µ —Å–≤—è–∑–∏, –∫–æ—Ç–æ—Ä—ã–µ –ø—Ä–æ–ø—É—Å–∫–∞–µ—Ç –ª–∏–Ω–µ–π–Ω–∞—è –∫–æ—Ä—Ä–µ–ª—è—Ü–∏—è

In [48]:
def explain_model_mutual_info(X_train, y_train, top_n=20, random_state=3):
    """
    –†–∞—Å—á—ë—Ç –≤–∞–∂–Ω–æ—Å—Ç–∏ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –Ω–∞ –æ—Å–Ω–æ–≤–µ Mutual Information.
    """
    try:
        start_time = time.time()
        print(f"‚ÑπÔ∏è Calculating Mutual Information for {X_train.shape[1]} features...")

        # 1. –†–∞—Å—á—ë—Ç MI
        mi_scores = mutual_info_classif(X_train, y_train, random_state=random_state)
        mi_df = pd.DataFrame({
            'Feature': X_train.columns,
            'MI_Score': mi_scores
        }).sort_values('MI_Score', ascending=False)

        elapsed_time = time.time() - start_time
        print(f"‚úÖ MI calculation completed in {elapsed_time:.2f} seconds")

        # 2. –¢–∞–±–ª–∏—Ü–∞ —Ç–æ–ø-N
        print(f"\nüîç Top {top_n} Features by Mutual Information:")
        print(mi_df.head(top_n).to_markdown(index=False, floatfmt=".4f"))

        # 3. –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è
        plt.figure(figsize=(10, min(6, top_n * 0.3)))
        plt.barh(mi_df['Feature'].head(top_n)[::-1], 
                 mi_df['MI_Score'].head(top_n)[::-1], 
                 color='skyblue')
        plt.xlabel('Mutual Information Score')
        plt.title(f'Top {top_n} Features by Mutual Information')
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"‚ùå –û—à–∏–±–∫–∞ –ø—Ä–∏ —Ä–∞—Å—á—ë—Ç–µ Mutual Information: {str(e)}")

# –†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —Ü–µ–ª–µ–≤–æ–π –ø–µ—Ä–µ–º–µ–Ω–Ω–æ–π –≤–Ω—É—Ç—Ä–∏ –≤—ã–±–æ—Ä–æ–∫

## show_class_balance

**show_class_balance** –ê–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –∏ –≤–∏–∑—É–∞–ª–∏–∑–∏—Ä—É–µ—Ç —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –∫–ª–∞—Å—Å–æ–≤ –ø–æ –≤—ã–±–æ—Ä–∫–∞–º

–í—Ö–æ–¥:
- y: —Ü–µ–ª–µ–≤–∞—è –ø–µ—Ä–µ–º–µ–Ω–Ω–∞—è –≤—Å–µ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞
- y_train: –æ–±—É—á–∞—é—â–∞—è –≤—ã–±–æ—Ä–∫–∞
- y_valid: –≤–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω–∞—è –≤—ã–±–æ—Ä–∫–∞
- y_test: —Ç–µ—Å—Ç–æ–≤–∞—è –≤—ã–±–æ—Ä–∫–∞

–í—ã—Ö–æ–¥:
- –¢–∞–±–ª–∏—Ü–∞ —Å –¥–æ–ª—è–º–∏ –∫–ª–∞—Å—Å–æ–≤ –≤ –∫–∞–∂–¥–æ–π –≤—ã–±–æ—Ä–∫–µ
- –°—Ç–æ–ª–±—á–∞—Ç–∞—è –¥–∏–∞–≥—Ä–∞–º–º–∞ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è
- –í–∏–∑—É–∞–ª—å–Ω–∞—è –ø—Ä–æ–≤–µ—Ä–∫–∞ —Å–±–∞–ª–∞–Ω—Å–∏—Ä–æ–≤–∞–Ω–Ω–æ—Å—Ç–∏ –¥–∞–Ω–Ω—ã—Ö

–ß—Ç–æ –¥–µ–ª–∞–µ—Ç: –°—Ä–∞–≤–Ω–∏–≤–∞–µ—Ç –ø—Ä–æ–ø–æ—Ä—Ü–∏–∏ –∫–ª–∞—Å—Å–æ–≤ –º–µ–∂–¥—É —Ä–∞–∑–Ω—ã–º–∏ –≤—ã–±–æ—Ä–∫–∞–º–∏ –¥–ª—è –∫–æ–Ω—Ç—Ä–æ–ª—è —Ä–µ–ø—Ä–µ–∑–µ–Ω—Ç–∞—Ç–∏–≤–Ω–æ—Å—Ç–∏ —Ä–∞–∑–±–∏–µ–Ω–∏—è

In [31]:
def show_class_balance(y, y_train, y_valid, y_test):
    # –°–æ–±–∏—Ä–∞–µ–º –¥–∞–Ω–Ω—ã–µ –≤ —Ç–∞–±–ª–∏—Ü—É
    balance_df = pd.DataFrame({
        '–í–µ—Å—å –¥–∞—Ç–∞—Å–µ—Ç': y.value_counts(normalize=True).round(3),
        '–û–±—É—á–∞—é—â–∞—è': y_train.value_counts(normalize=True).round(3),
        '–í–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω–∞—è': y_valid.value_counts(normalize=True).round(3),
        '–¢–µ—Å—Ç–æ–≤–∞—è': y_test.value_counts(normalize=True).round(3)
    }).fillna(0)  # –Ω–∞ —Å–ª—É—á–∞–π –æ—Ç—Å—É—Ç—Å—Ç–≤—É—é—â–∏—Ö –∫–ª–∞—Å—Å–æ–≤
    
    # –í—ã–≤–æ–¥–∏–º —Ç–∞–±–ª–∏—Ü—É –≤ —Å—Ç–∏–ª–µ "plain"
    print("üìä –ë–∞–ª–∞–Ω—Å –∫–ª–∞—Å—Å–æ–≤ (–¥–æ–ª–∏):")
    print(
        balance_df.to_markdown(
            tablefmt="simple",  # –ß–∏—Å—Ç—ã–π —Ñ–æ—Ä–º–∞—Ç –±–µ–∑ –ª–∏—à–Ω–∏—Ö –ª–∏–Ω–∏–π
            stralign="center",  # –í—ã—Ä–∞–≤–Ω–∏–≤–∞–Ω–∏–µ –ø–æ —Ü–µ–Ω—Ç—Ä—É
            floatfmt=".3f"       # –§–æ—Ä–º–∞—Ç —á–∏—Å–µ–ª
        )
    )
    
    # –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è
    plt.figure(figsize=(10, 5))
    balance_df.plot(kind='bar', width=0.8, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
    plt.title('–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –∫–ª–∞—Å—Å–æ–≤ –ø–æ –≤—ã–±–æ—Ä–∫–∞–º', pad=20)
    plt.ylim(0, 1)
    plt.ylabel('–î–æ–ª—è –∫–ª–∞—Å—Å–∞')
    plt.xticks(rotation=0)
    plt.grid(axis='y', linestyle='--', alpha=0.3)
    plt.legend(framealpha=0.9)
    plt.tight_layout()
    plt.show()

# –ê–Ω–∞–ª–∏–∑ –ø–æ—Ä–æ–≥–∞ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏

## evaluate_model_with_threshold

**evaluate_model_with_threshold** –û—Ü–µ–Ω–∏–≤–∞–µ—Ç –º–æ–¥–µ–ª—å —Å –ø–æ–¥–±–æ—Ä–æ–º –æ–ø—Ç–∏–º–∞–ª—å–Ω–æ–≥–æ –ø–æ—Ä–æ–≥–∞ –∏ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã

–í—Ö–æ–¥:
- model: –æ–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏
- X_train, y_train: –æ–±—É—á–∞—é—â–∞—è –≤—ã–±–æ—Ä–∫–∞
- X_valid, y_valid: –≤–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω–∞—è –≤—ã–±–æ—Ä–∫–∞
- X_test, y_test: —Ç–µ—Å—Ç–æ–≤–∞—è –≤—ã–±–æ—Ä–∫–∞ (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)

–í—ã—Ö–æ–¥:
- –°–ª–æ–≤–∞—Ä—å —Å –º–æ–¥–µ–ª—å—é, –º–µ—Ç—Ä–∏–∫–∞–º–∏ –∏ –º–µ—Ç–∞–¥–∞–Ω–Ω—ã–º–∏:
- –û–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å
- –ú–µ—Ç—Ä–∏–∫–∏ (F1, Precision, Recall, ROC AUC) –¥–ª—è –≤—Å–µ—Ö –≤—ã–±–æ—Ä–æ–∫
- –û–ø—Ç–∏–º–∞–ª—å–Ω—ã–π –ø–æ—Ä–æ–≥ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏
- –°–ø–∏—Å–æ–∫ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–Ω—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤

In [73]:
def evaluate_model_with_threshold(model, X_train, y_train, X_valid, y_valid, X_test=None, y_test=None):
    """
    –û—Ü–µ–Ω–∏–≤–∞–µ—Ç –º–æ–¥–µ–ª—å –∏ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –≤ —Ñ–æ—Ä–º–∞—Ç–µ –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è
    
    –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç —Å–ª–æ–≤–∞—Ä—å –≤ —Ñ–æ—Ä–º–∞—Ç–µ:
    {
        'model': model,  # –æ–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å
        'metrics': {
            'train': {–º–µ—Ç—Ä–∏–∫–∏},
            'valid': {–º–µ—Ç—Ä–∏–∫–∏},
            'test': {–º–µ—Ç—Ä–∏–∫–∏} (–µ—Å–ª–∏ –µ—Å—Ç—å),
            'optimal_threshold': float
        },
        'features': list,  # —Å–ø–∏—Å–æ–∫ —Ñ–∏—á–µ–π
        'timestamp': str   # –≤—Ä–µ–º—è –æ—Ü–µ–Ω–∫–∏
    }
    """
    from sklearn.metrics import roc_auc_score
    
    # 1. –ü–æ–ª—É—á–∞–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω—ã–µ –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–∏
    y_train_proba = model.predict_proba(X_train)[:, 1]
    y_valid_proba = model.predict_proba(X_valid)[:, 1]
    
    if X_test is not None and y_test is not None:
        y_test_proba = model.predict_proba(X_test)[:, 1]
    
    # 2. –°–æ–∑–¥–∞–µ–º –¥–∏–∞–ø–∞–∑–æ–Ω –ø–æ—Ä–æ–≥–æ–≤
    thresholds = np.linspace(0.01, 0.99, 99)
    
    # 3. –§—É–Ω–∫—Ü–∏—è –¥–ª—è –≤—ã—á–∏—Å–ª–µ–Ω–∏—è F1 –ø—Ä–∏ —Ä–∞–∑–Ω—ã—Ö –ø–æ—Ä–æ–≥–∞—Ö
    def find_best_threshold(y_true, y_proba, thresholds):
        f1_scores = []
        for t in thresholds:
            y_pred = (y_proba >= t).astype(int)
            f1_scores.append(f1_score(y_true, y_pred, zero_division=0))
        best_idx = np.argmax(f1_scores)
        return thresholds[best_idx], f1_scores
    
    # 4. –ù–∞—Ö–æ–¥–∏–º –ª—É—á—à–∏–µ –ø–æ—Ä–æ–≥–∏ –¥–ª—è train –∏ valid
    train_best_threshold, train_f1_scores = find_best_threshold(y_train, y_train_proba, thresholds)
    valid_best_threshold, valid_f1_scores = find_best_threshold(y_valid, y_valid_proba, thresholds)
    
    # 5. –í—ã—á–∏—Å–ª—è–µ–º —Å—Ä–µ–¥–Ω–∏–π –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã–π –ø–æ—Ä–æ–≥
    optimal_threshold = np.mean([train_best_threshold, valid_best_threshold])
    
    # 6. –°–æ–∑–¥–∞–µ–º —Å–ª–æ–≤–∞—Ä–∏ —Å –º–µ—Ç—Ä–∏–∫–∞–º–∏
    train_metrics = {
        'thresholds': thresholds,
        'f1_scores': train_f1_scores,
        'precision': [precision_score(y_train, (y_train_proba >= t).astype(int), zero_division=0) for t in thresholds],
        'recall': [recall_score(y_train, (y_train_proba >= t).astype(int), zero_division=0) for t in thresholds],
        'y_proba': y_train_proba,
        'max_f1_threshold': train_best_threshold,
        'roc_auc': roc_auc_score(y_train, y_train_proba)  # –î–æ–±–∞–≤–ª–µ–Ω–æ ROC AUC
    }
    
    valid_metrics = {
        'thresholds': thresholds,
        'f1_scores': valid_f1_scores,
        'precision': [precision_score(y_valid, (y_valid_proba >= t).astype(int), zero_division=0) for t in thresholds],
        'recall': [recall_score(y_valid, (y_valid_proba >= t).astype(int), zero_division=0) for t in thresholds],
        'y_proba': y_valid_proba,
        'max_f1_threshold': valid_best_threshold,
        'roc_auc': roc_auc_score(y_valid, y_valid_proba)  # –î–æ–±–∞–≤–ª–µ–Ω–æ ROC AUC
    }
    
    # 7. –í—ã–≤–æ–¥–∏–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã
    print(f"üéØ –õ—É—á—à–∏–π –ø–æ—Ä–æ–≥ –ø–æ F1 (Train): {train_best_threshold:.4f}")
    print(f"üéØ –õ—É—á—à–∏–π –ø–æ—Ä–æ–≥ –ø–æ F1 (Valid): {valid_best_threshold:.4f}")
    print(f"‚úÖ –£—Å—Ä–µ–¥–Ω–µ–Ω–Ω—ã–π –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã–π –ø–æ—Ä–æ–≥: {optimal_threshold:.4f}")
    print(f"\nüìä ROC AUC Scores:")
    print(f"‚úÖ Train ROC AUC: {train_metrics['roc_auc']:.4f}")
    print(f"‚úÖ Valid ROC AUC: {valid_metrics['roc_auc']:.4f}")
    
    # 8. –°—á–∏—Ç–∞–µ–º —Ñ–∏–Ω–∞–ª—å–Ω—ã–µ –º–µ—Ç—Ä–∏–∫–∏ —Å —É—Å—Ä–µ–¥–Ω–µ–Ω–Ω—ã–º –ø–æ—Ä–æ–≥–æ–º
    def calculate_final_metrics(y_true, y_proba, threshold, set_name):
        y_pred = (y_proba >= threshold).astype(int)
        metrics = {
            'F1': f1_score(y_true, y_pred, zero_division=0),
            'Precision': precision_score(y_true, y_pred, zero_division=0),
            'Recall': recall_score(y_true, y_pred, zero_division=0),
            'ROC_AUC': roc_auc_score(y_true, y_proba)  # –î–æ–±–∞–≤–ª–µ–Ω–æ ROC AUC
        }
        print(f"\nüìä {set_name} set (Threshold = {threshold:.4f}):")
        print(f"‚úÖ F1: {metrics['F1']:.4f}")
        print(f"‚úÖ Precision: {metrics['Precision']:.4f}")
        print(f"‚úÖ Recall: {metrics['Recall']:.4f}")
        print(f"‚úÖ ROC AUC: {metrics['ROC_AUC']:.4f}")
        return metrics
    
    train_metrics['final_metrics'] = calculate_final_metrics(y_train, y_train_proba, optimal_threshold, "Train")
    valid_metrics['final_metrics'] = calculate_final_metrics(y_valid, y_valid_proba, optimal_threshold, "Valid")
    
    results = {
        'train': train_metrics,
        'valid': valid_metrics,
        'optimal_threshold': optimal_threshold
    }
    
    if X_test is not None and y_test is not None:
        test_metrics = {
            'thresholds': thresholds,
            'f1_scores': [f1_score(y_test, (y_test_proba >= t).astype(int), zero_division=0) for t in thresholds],
            'precision': [precision_score(y_test, (y_test_proba >= t).astype(int), zero_division=0) for t in thresholds],
            'recall': [recall_score(y_test, (y_test_proba >= t).astype(int), zero_division=0) for t in thresholds],
            'y_proba': y_test_proba,
            'roc_auc': roc_auc_score(y_test, y_test_proba)  # –î–æ–±–∞–≤–ª–µ–Ω–æ ROC AUC
        }
        test_metrics['final_metrics'] = calculate_final_metrics(
            y_test, y_test_proba, optimal_threshold, "Test"
        )
        results['test'] = test_metrics
    
    # 9. –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è (–æ—Å—Ç–∞–µ—Ç—Å—è –±–µ–∑ –∏–∑–º–µ–Ω–µ–Ω–∏–π)
    plt.figure(figsize=(18, 6))
    
    # 1. –ö—Ä–∏–≤—ã–µ –¥–ª—è –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏
    plt.subplot(1, 3, 1)
    plt.plot(train_metrics['thresholds'], train_metrics['precision'], label='Precision', color='blue')
    plt.plot(train_metrics['thresholds'], train_metrics['recall'], label='Recall', color='green')
    plt.plot(train_metrics['thresholds'], train_metrics['f1_scores'], label='F1', color='red')
    plt.axvline(optimal_threshold, color='k', linestyle='-', label=f'Avg Optimal: {optimal_threshold:.3f}')
    plt.axvline(train_best_threshold, color='b', linestyle=':', label=f'Train Max F1: {train_best_threshold:.3f}')
    plt.title('Train Selection')
    plt.xlabel('Threshold')
    plt.ylabel('Score')
    plt.legend()
    plt.grid()
    
    # 2. –ö—Ä–∏–≤—ã–µ –¥–ª—è –≤–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω–æ–π –≤—ã–±–æ—Ä–∫–∏
    plt.subplot(1, 3, 2)
    plt.plot(valid_metrics['thresholds'], valid_metrics['precision'], label='Precision', color='blue')
    plt.plot(valid_metrics['thresholds'], valid_metrics['recall'], label='Recall', color='green')
    plt.plot(valid_metrics['thresholds'], valid_metrics['f1_scores'], label='F1', color='red')
    plt.axvline(optimal_threshold, color='k', linestyle='-', label=f'Avg Optimal: {optimal_threshold:.3f}')
    plt.axvline(valid_best_threshold, color='orange', linestyle=':', label=f'Valid Max F1: {valid_best_threshold:.3f}')
    plt.title('Test Set')
    plt.xlabel('Threshold')
    plt.ylabel('Score')
    plt.legend()
    plt.grid()
    
    # 3. –°—Ä–∞–≤–Ω–µ–Ω–∏–µ F1-–∫—Ä–∏–≤—ã—Ö —Å –Ω–æ–≤—ã–º –ø–æ—Ä–æ–≥–æ–º
    plt.subplot(1, 3, 3)
    plt.plot(train_metrics['thresholds'], train_metrics['f1_scores'], label='Train F1', color='blue')
    plt.plot(valid_metrics['thresholds'], valid_metrics['f1_scores'], label='Valid F1', color='orange')
    
    # –î–æ–±–∞–≤–ª–µ–Ω–∞ —Ç—Ä–µ—Ç—å—è –ª–∏–Ω–∏—è –¥–ª—è —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–∏, –µ—Å–ª–∏ –æ–Ω–∞ –µ—Å—Ç—å
    if X_test is not None and y_test is not None:
        plt.plot(test_metrics['thresholds'], test_metrics['f1_scores'], label='Test F1', color='green')
    
    plt.axvline(optimal_threshold, color='k', linestyle='-', label=f'Avg Optimal: {optimal_threshold:.3f}')
    plt.axvline(train_best_threshold, color='b', linestyle=':', alpha=0.5)
    plt.axvline(valid_best_threshold, color='orange', linestyle=':', alpha=0.5)
    plt.title('F1 Comparison with Optimal Threshold')
    plt.xlabel('Threshold')
    plt.ylabel('F1 Score')
    plt.legend()
    plt.grid()
    
    plt.tight_layout()
    plt.show()
    
    # 10. –í—ã–≤–æ–¥–∏–º –∏—Ç–æ–≥–æ–≤—ã–µ –º–µ—Ç—Ä–∏–∫–∏ –≤ —Ç–∞–±–ª–∏—Ü–µ (–¥–æ–±–∞–≤–ª—è–µ–º ROC AUC)
    final_table = [
        ["Dataset", "Threshold Type"] + list(train_metrics['final_metrics'].keys()),
        ["Train", f"Average Optimal ({optimal_threshold:.4f})"] + list(train_metrics['final_metrics'].values()),
        ["Test", f"Average Optimal ({optimal_threshold:.4f})"] + list(valid_metrics['final_metrics'].values())
    ]
    
    if X_test is not None and y_test is not None:
        final_table.append(
            ["Test", f"Average Optimal ({optimal_threshold:.4f})"] + list(results['test']['final_metrics'].values())
        )
    
    # print("\n–ò—Ç–æ–≥–æ–≤—ã–µ –º–µ—Ç—Ä–∏–∫–∏ —Å–æ —Å—Ä–µ–¥–Ω–∏–º –æ–ø—Ç–∏–º–∞–ª—å–Ω—ã–º –ø–æ—Ä–æ–≥–æ–º:")
    # print(tabulate(final_table, headers="firstrow", floatfmt=".4f", tablefmt="grid"))

     # –§–æ—Ä–º–∏—Ä—É–µ–º –∏—Ç–æ–≥–æ–≤—ã–π —Å–ª–æ–≤–∞—Ä—å –≤ –Ω—É–∂–Ω–æ–º —Ñ–æ—Ä–º–∞—Ç–µ
    model_package = {
        'model': model,
        'metrics': {
            'train': train_metrics['final_metrics'],
            'valid': valid_metrics['final_metrics'],
            'optimal_threshold': optimal_threshold
        },
        'features': list(X_train.columns),
        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    
    if X_test is not None and y_test is not None:
        model_package['metrics']['test'] = results['test']['final_metrics']
    
    return model_package

# –ò–Ω—Ç–µ—Ä–ø—Ä–µ—Ç–∞—Ü–∏—è –º–æ–¥–µ–ª–∏

## SHAP

### explain_model_shap

–§—É–Ω–∫—Ü–∏—è **explain_model_shap**: 
- –í—ã—á–∏—Å–ª—è–µ—Ç SHAP-–∑–Ω–∞—á–µ–Ω–∏—è –¥–ª—è –∏–Ω—Ç–µ—Ä–ø—Ä–µ—Ç–∞—Ü–∏–∏ –º–æ–¥–µ–ª–∏
- –ê–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –≤–∞–∂–Ω–æ—Å—Ç—å –∏ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏–µ –≤–ª–∏—è–Ω–∏—è –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
- –í–∏–∑—É–∞–ª–∏–∑–∏—Ä—É–µ—Ç —Ç–æ–ø-N –Ω–∞–∏–±–æ–ª–µ–µ –∑–Ω–∞—á–∏–º—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤

–í—Ö–æ–¥–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ:
- X_train ‚Äî –¥–∞—Ç–∞—Ñ—Ä–µ–π–º —Å –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏
- model ‚Äî –æ–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å (RandomForest, XGBoost, LogisticRegression –∏ –¥—Ä.)
- sample_size ‚Äî —Ä–∞–∑–º–µ—Ä –ø–æ–¥–≤—ã–±–æ—Ä–∫–∏ –¥–ª—è –∞–Ω–∞–ª–∏–∑–∞ (–ø–æ —É–º–æ–ª—á–∞–Ω–∏—é 2000)
- top_n ‚Äî –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ç–æ–ø-–ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
- n_jobs ‚Äî –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —è–¥–µ—Ä –¥–ª—è –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω—ã—Ö –≤—ã—á–∏—Å–ª–µ–Ω–∏–π

–í—ã—Ö–æ–¥–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ:
- DataFrame —Å —Ä–∞–Ω–∂–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏ –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏ –ø–æ –≤–∞–∂–Ω–æ—Å—Ç–∏
- –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è –≤–∞–∂–Ω–æ—Å—Ç–∏ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤

–û—Å–æ–±–µ–Ω–Ω–æ—Å—Ç–∏ –º–µ—Ç–æ–¥–∞:
- –ê–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —Ç–∏–ø–∞ –º–æ–¥–µ–ª–∏ (TreeExplainer, LinearExplainer)
- –ü–æ–¥–¥–µ—Ä–∂–∫–∞ –º–Ω–æ–≥–æ–∫–ª–∞—Å—Å–æ–≤–æ–π –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏
- –ê–Ω–∞–ª–∏–∑ –Ω–∞–ø—Ä–∞–≤–ª–µ–Ω–∏—è –≤–ª–∏—è–Ω–∏—è (Positive/Negative)
- –°—Ä–∞–≤–Ω–µ–Ω–∏–µ —Å feature_importances_ –º–æ–¥–µ–ª–∏

–ö–ª—é—á–µ–≤—ã–µ –º–µ—Ç—Ä–∏–∫–∏:
- –û—Ç–Ω–æ—Å–∏—Ç–µ–ª—å–Ω–∞—è –≤–∞–∂–Ω–æ—Å—Ç—å –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –≤ %
- –ù–∞–ø—Ä–∞–≤–ª–µ–Ω–∏–µ –≤–ª–∏—è–Ω–∏—è –Ω–∞ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
- –ö—É–º—É–ª—è—Ç–∏–≤–Ω–∞—è –≤–∞–∂–Ω–æ—Å—Ç—å —Ç–æ–ø-–ø—Ä–∏–∑–Ω–∞–∫–æ–≤


In [59]:
def explain_model_shap(X_train, model, sample_size=2000, top_n=20, n_jobs = -1):
    """
    –û–±–æ—Ä–∞—á–∏–≤–∞–µ—Ç —Ä–∞—Å—á–µ—Ç SHAP-–≤–∞–∂–Ω–æ—Å—Ç–∏ –∏ –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–∏ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
    
    –ü–∞—Ä–∞–º–µ—Ç—Ä—ã:
    ----------
    X_train : pd.DataFrame
        –î–∞—Ç–∞—Ñ—Ä–µ–π–º –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
    model : sklearn/xgboost –º–æ–¥–µ–ª—å
        –û–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å (RandomForest, LogisticRegression, XGB –∏ –¥—Ä.)
    sample_size : int
        –†–∞–∑–º–µ—Ä —Å–ª—É—á–∞–π–Ω–æ–π –ø–æ–¥–≤—ã–±–æ—Ä–∫–∏
    top_n : int
        –ö–æ–ª-–≤–æ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
    """
    try:
        total_start_time = time.time()
        model_type = type(model).__name__
        
        print(f"‚ÑπÔ∏è Model type: {model_type}")
        print(f"‚ÑπÔ∏è Number of classes: {getattr(model, 'n_classes_', 'unknown')}")
        
        # 1. –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è Explainer
        print("üîÑ Initializing SHAP explainer...")
        explainer_start = time.time()
        if model_type in ['RandomForestClassifier', 'RandomForestRegressor', 
                          'XGBClassifier', 'XGBRegressor', 
                          'LGBMClassifier', 'LGBMRegressor']:
            explainer = shap.TreeExplainer(model, feature_perturbation="tree_path_dependent")
        elif model_type in ['LogisticRegression', 'LinearRegression']:
            explainer = shap.LinearExplainer(model, X_train)
        else:
            explainer = shap.Explainer(model, X_train)
        explainer_time = time.time() - explainer_start
        print(f"‚úÖ SHAP explainer initialized in {timedelta(seconds=explainer_time)}")
        
        # 2. –ü–æ–¥–≤—ã–±–æ—Ä–∫–∞
        sample_size = min(sample_size, len(X_train))
        sample_idx = np.random.choice(X_train.index, size=sample_size, replace=False)
        X_sample = X_train.loc[sample_idx]

        print(f"\nüîÑ Calculating SHAP values for {sample_size} samples...")
        shap_start = time.time()

        # –ü–∞—Ä–∞–ª–ª–µ–ª—å–Ω–∞—è –æ–±—Ä–∞–±–æ—Ç–∫–∞
        n_jobs = n_jobs
        n_chunks = 4 * (os.cpu_count() or 1)

        def calc_chunk(chunk):
            return explainer.shap_values(chunk, approximate=True, check_additivity=False)

        chunks = np.array_split(X_sample, n_chunks)
        results = Parallel(n_jobs=n_jobs)(delayed(calc_chunk)(chunk) for chunk in chunks)

        # –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤
        if isinstance(results[0], list):
            shap_values = [np.concatenate([r[i] for r in results]) for i in range(len(results[0]))]
        else:
            shap_values = np.concatenate(results)

        shap_time = time.time() - shap_start
        print(f"‚úÖ SHAP values calculated in {timedelta(seconds=shap_time)}")
        print(f"‚è± Average time per sample: {shap_time/sample_size:.4f} seconds")

        # 3. –û–±—Ä–∞–±–æ—Ç–∫–∞ SHAP
        if isinstance(shap_values, list):
            shap_values = shap_values[1] if len(shap_values) == 2 else np.mean(shap_values, axis=0)
        elif isinstance(shap_values, np.ndarray) and shap_values.ndim == 3:
            shap_values = shap_values[:, :, 1]

        print(f"‚ÑπÔ∏è Processed SHAP values shape: {shap_values.shape}")

        # 4. –ê–Ω–∞–ª–∏–∑ –≤–∞–∂–Ω–æ—Å—Ç–∏
        print("\nüîÑ Calculating feature importance...")
        analysis_start = time.time()
        importance_df = pd.DataFrame({
            'Feature': X_train.columns,
            'SHAP_Importance': np.abs(shap_values).mean(axis=0),
            'Direction': np.where(np.mean(shap_values, axis=0) > 0, 'Positive', 'Negative')
        })
        if hasattr(model, 'feature_importances_'):
            importance_df['Model_Importance'] = model.feature_importances_
            importance_df['Model_%'] = 100 * importance_df['Model_Importance'] / importance_df['Model_Importance'].max()

        importance_df['SHAP_%'] = 100 * importance_df['SHAP_Importance'] / importance_df['SHAP_Importance'].max()
        importance_df = importance_df.sort_values('SHAP_%', ascending=False)
        importance_df['Rank'] = range(1, len(importance_df) + 1)
        importance_df['Cumulative_SHAP_%'] = importance_df['SHAP_%'].cumsum()
        analysis_time = time.time() - analysis_start
        print(f"‚úÖ Feature analysis completed in {timedelta(seconds=analysis_time)}")

        # 5. –¢–∞–±–ª–∏—Ü–∞
        print("\nüîç Top Features by SHAP Importance:")
        display_cols = ['Rank', 'Feature', 'SHAP_%', 'Direction']
        if 'Model_%' in importance_df.columns:
            display_cols.append('Model_%')
        print(importance_df.head(top_n)[display_cols].to_markdown(index=False, floatfmt=".1f"))

        print("\nüìä Key Metrics:")
        print(f"‚Ä¢ Top-5 features explain: {importance_df['Cumulative_SHAP_%'].iloc[4]:.1f}%")
        pos_count = (importance_df['Direction'] == 'Positive').sum()
        neg_count = (importance_df['Direction'] == 'Negative').sum()
        print(f"‚Ä¢ Positive/Negative: {pos_count}/{neg_count}")

        # 6. –ü—Ä–æ—Å—Ç–∞—è –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è
        plt.figure(figsize=(10, min(6, top_n * 0.3)))
        colors = importance_df['Direction'].head(top_n).map({'Positive': 'tomato', 'Negative': 'dodgerblue'})
        plt.barh(importance_df['Feature'].head(top_n)[::-1], 
                 importance_df['SHAP_%'].head(top_n)[::-1],
                 color=colors[::-1])
        plt.title(f'Top {top_n} Features by SHAP')
        plt.xlabel('Relative SHAP Importance (%)')
        plt.tight_layout()
        plt.show()

        # 7. –û–±—â–µ–µ –≤—Ä–µ–º—è
        total_time = time.time() - total_start_time
        print(f"\n‚è± Total execution time: {timedelta(seconds=total_time)}")
        print("="*50)
        print("Time breakdown:")
        print(f"- Explainer init: {timedelta(seconds=explainer_time)}")
        print(f"- SHAP values: {timedelta(seconds=shap_time)} ({shap_time/total_time*100:.1f}%)")
        print(f"- Analysis: {timedelta(seconds=analysis_time)} ({analysis_time/total_time*100:.1f}%)")

        return importance_df

    except Exception as e:
        print(f"\n‚ùå Error: {str(e)}")
        if 'shap_values' in locals():
            print(f"SHAP values type: {type(shap_values)}")
            if hasattr(shap_values, 'shape'):
                print(f"SHAP values shape: {shap_values.shape}")
        print(f"X_train shape: {X_train.shape if X_train is not None else 'N/A'}")
        if hasattr(model, 'n_features_in_'):
            print(f"Model features: {model.n_features_in_}")
        return None

## Permutation Importance

### explain_model_permutation

**explain_model_permutation** –û—Ü–µ–Ω–∏–≤–∞–µ—Ç –≤–∞–∂–Ω–æ—Å—Ç—å –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ —Å –ø–æ–º–æ—â—å—é Permutation Importance

–í—Ö–æ–¥:
- X: DataFrame —Å –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏
- y: —Ü–µ–ª–µ–≤–∞—è –ø–µ—Ä–µ–º–µ–Ω–Ω–∞—è
- model: –æ–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å (RandomForest, XGBoost –∏ –¥—Ä.)
- scoring: –º–µ—Ç—Ä–∏–∫–∞ –æ—Ü–µ–Ω–∫–∏ ('f1', 'accuracy', 'roc_auc')
- n_repeats: –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø–æ–≤—Ç–æ—Ä–æ–≤ –¥–ª—è —Å—Ç–∞–±–∏–ª—å–Ω–æ—Å—Ç–∏
- top_n: –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ç–æ–ø-–ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
- random_state: seed –¥–ª—è –≤–æ—Å–ø—Ä–æ–∏–∑–≤–æ–¥–∏–º–æ—Å—Ç–∏
- n_jobs: –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —è–¥–µ—Ä –¥–ª—è –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω—ã—Ö –≤—ã—á–∏—Å–ª–µ–Ω–∏–π

–í—ã—Ö–æ–¥:
- DataFrame —Å –≤–∞–∂–Ω–æ—Å—Ç—å—é –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ (Feature, Mean Importance, Std, Significant, Rank)
- –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è —Ç–æ–ø-–ø—Ä–∏–∑–Ω–∞–∫–æ–≤ —Å –¥–æ–≤–µ—Ä–∏—Ç–µ–ª—å–Ω—ã–º–∏ –∏–Ω—Ç–µ—Ä–≤–∞–ª–∞–º–∏


In [44]:
def explain_model_permutation(X, y, model, scoring='f1', n_repeats=5, top_n=20, random_state=3, n_jobs = 4):
    """
    –û—Ü–µ–Ω–∏–≤–∞–µ—Ç –≤–∞–∂–Ω–æ—Å—Ç—å –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ —Å –ø–æ–º–æ—â—å—é Permutation Importance.
    
    –ü–∞—Ä–∞–º–µ—Ç—Ä—ã:
    ----------
    X : pd.DataFrame
        –ü—Ä–∏–∑–Ω–∞–∫–∏ (X_train –∏–ª–∏ X_valid)
    y : pd.Series
        –¶–µ–ª–µ–≤–∞—è –ø–µ—Ä–µ–º–µ–Ω–Ω–∞—è
    model : –æ–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å
        RandomForest, LogisticRegression, XGBoost –∏ —Ç.–¥.
    scoring : str
        –ú–µ—Ç—Ä–∏–∫–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, 'f1', 'accuracy', 'roc_auc')
    n_repeats : int
        –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø–æ–≤—Ç–æ—Ä–æ–≤ –¥–ª—è —Å–ª—É—á–∞–π–Ω–æ—Å—Ç–∏
    top_n : int
        –ö–æ–ª-–≤–æ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤ –¥–ª—è –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è
    random_state : int
        –°–ª—É—á–∞–π–Ω–æ–µ –∑–µ—Ä–Ω–æ –¥–ª—è –≤–æ—Å–ø—Ä–æ–∏–∑–≤–æ–¥–∏–º–æ—Å—Ç–∏
    
    –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç:
    -----------
    pd.DataFrame ‚Äî —Ç–∞–±–ª–∏—Ü–∞ –≤–∞–∂–Ω–æ—Å—Ç–∏ –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
    """
    try:
        print(f"‚ÑπÔ∏è Model type: {type(model).__name__}")
        print(f"‚ÑπÔ∏è Scoring metric: {scoring}")

        start_time = time.time()

        print("üîÑ Calculating permutation importance...")
        result = permutation_importance(
            model, X, y,
            scoring=scoring,
            n_repeats=n_repeats,
            random_state=random_state,
            n_jobs=n_jobs
        )

        elapsed = time.time() - start_time
        print(f"‚úÖ Completed in {timedelta(seconds=elapsed)}")

        # –§–æ—Ä–º–∏—Ä—É–µ–º –¥–∞—Ç–∞—Ñ—Ä–µ–π–º
        importances_df = pd.DataFrame({
            'Feature': X.columns,
            'Mean Importance': result.importances_mean,
            'Std': result.importances_std
        })
        importances_df['Significant'] = importances_df['Mean Importance'] - 2 * importances_df['Std'] > 0
        importances_df = importances_df.sort_values(by='Mean Importance', ascending=False).reset_index(drop=True)
        importances_df['Rank'] = importances_df.index + 1

        print("\nüîç Top Features by Permutation Importance:")
        display_cols = ['Rank', 'Feature', 'Mean Importance', 'Std', 'Significant']
        print(importances_df.head(top_n)[display_cols].to_markdown(index=False, floatfmt=".3f"))

        # –ü—Ä–æ—Å—Ç–∞—è –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è
        top_features = importances_df.head(top_n)
        plt.figure(figsize=(10, min(6, top_n * 0.3)))
        bars = plt.barh(top_features['Feature'][::-1], top_features['Mean Importance'][::-1],
                        xerr=top_features['Std'][::-1], color='mediumseagreen')
        plt.xlabel("Mean Importance")
        plt.title(f"Top {top_n} Features by Permutation Importance")
        plt.tight_layout()
        plt.show()

        return importances_df

    except Exception as e:
        print(f"‚ùå Error during permutation importance: {e}")
        return None

# –°–∏–≥–Ω–∞–ª—ã –º–æ–¥–µ–ª–∏

**plot_predict_signals** –í–∏–∑—É–∞–ª–∏–∑–∏—Ä—É–µ—Ç OHLC –≥—Ä–∞—Ñ–∏–∫ —Å –∏—Å—Ç–∏–Ω–Ω—ã–º–∏ –∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω—ã–º–∏ —Ç–æ—Ä–≥–æ–≤—ã–º–∏ —Å–∏–≥–Ω–∞–ª–∞–º–∏

–ê—Ä–≥—É–º–µ–Ω—Ç—ã:
- df —Å –∫–æ–ª–æ–Ω–∫–∞–º–∏: 'Open','High','Low','Close','buy','sell'
- y_pred: –º–∞—Å—Å–∏–≤ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏ (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)
- pred_threshold: –ø–æ—Ä–æ–≥ –±–∏–Ω–∞—Ä–∏–∑–∞—Ü–∏–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π
- start_idx: –Ω–∞—á–∞–ª—å–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞
- end_idx: –∫–æ–Ω–µ—á–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞

–û—Ç–æ–±—Ä–∞–∂–∞–µ—Ç:
- –õ–∏–Ω–∏–∏ OHLC —Ü–µ–Ω
- –ò—Å—Ç–∏–Ω–Ω—ã–µ buy/sell —Å–∏–≥–Ω–∞–ª—ã (–∑–µ–ª–µ–Ω—ã–µ/–∫—Ä–∞—Å–Ω—ã–µ –º–∞—Ä–∫–µ—Ä—ã)
- –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω—ã–µ buy —Å–∏–≥–Ω–∞–ª—ã –º–æ–¥–µ–ª–∏ (—Å–∏–Ω–∏–µ –º–∞—Ä–∫–µ—Ä—ã)

In [5]:
def plot_predict_signals(df, y_pred=None, pred_threshold=0.5, start_idx=200, end_idx=500):
    """
    –†–∏—Å—É–µ—Ç –≥—Ä–∞—Ñ–∏–∫ OHLC —Å –ø–æ–¥—Å–≤–µ—Ç–∫–æ–π buy/sell —Å–∏–≥–Ω–∞–ª–æ–≤ –∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è–º–∏ –º–æ–¥–µ–ª–∏
    
    Args:
        df: DataFrame —Å –∫–æ–ª–æ–Ω–∫–∞–º–∏ 'Open','High','Low','Close','buy','sell'
        y_pred: –º–∞—Å—Å–∏–≤ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π –º–æ–¥–µ–ª–∏ (–≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç–∏ –∏–ª–∏ –±–∏–Ω–∞—Ä–Ω—ã–µ)
        pred_threshold: –ø–æ—Ä–æ–≥ –¥–ª—è –±–∏–Ω–∞—Ä–∏–∑–∞—Ü–∏–∏ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π
        start_idx: –Ω–∞—á–∞–ª—å–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞
        end_idx: –∫–æ–Ω–µ—á–Ω—ã–π –∏–Ω–¥–µ–∫—Å —É—á–∞—Å—Ç–∫–∞
    """
    if end_idx is None:
        end_idx = len(df)
    
    plot_data = df.iloc[start_idx:end_idx].copy()
    
    # –î–æ–±–∞–≤–ª—è–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –º–æ–¥–µ–ª–∏ –µ—Å–ª–∏ –æ–Ω–∏ –ø–µ—Ä–µ–¥–∞–Ω—ã
    if y_pred is not None:
        # –ë–∏–Ω–∞—Ä–∏–∑—É–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –ø–æ –ø–æ—Ä–æ–≥—É
        y_pred_binary = (y_pred[start_idx:end_idx] >= pred_threshold).astype(int)
        plot_data['model_buy'] = y_pred_binary
    
    plt.figure(figsize=(16, 8))
    
    # –†–∏—Å—É–µ–º –≤—Å–µ —Ü–µ–Ω–æ–≤—ã–µ –ª–∏–Ω–∏–∏
    plt.plot(plot_data.index, plot_data['Close'], 'b-', label='Close', linewidth=1.5, alpha=0.8)
    plt.plot(plot_data.index, plot_data['Open'], 'g--', label='Open', linewidth=1, alpha=0.6)
    plt.plot(plot_data.index, plot_data['High'], 'c:', label='High', linewidth=1, alpha=0.6)
    plt.plot(plot_data.index, plot_data['Low'], 'm:', label='Low', linewidth=1, alpha=0.6)
    
    # –°–∏–≥–Ω–∞–ª—ã –ø–æ–∫—É–ø–∫–∏ (–∏—Å—Ç–∏–Ω–Ω—ã–µ)
    buy_signals = plot_data[plot_data['buy'] == 1]
    if not buy_signals.empty:
        plt.scatter(buy_signals.index, buy_signals['Close'], 
                   color='green', marker='^', s=120, label='True Buy', zorder=5, alpha=0.8)
    
    # –°–∏–≥–Ω–∞–ª—ã –ø—Ä–æ–¥–∞–∂–∏ (–∏—Å—Ç–∏–Ω–Ω—ã–µ)
    sell_signals = plot_data[plot_data['sell'] == 1]
    if not sell_signals.empty:
        plt.scatter(sell_signals.index, sell_signals['Close'], 
                   color='red', marker='v', s=120, label='True Sell', zorder=5, alpha=0.8)
    
    # –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è –º–æ–¥–µ–ª–∏ (–µ—Å–ª–∏ –ø–µ—Ä–µ–¥–∞–Ω—ã)
    if y_pred is not None:
        model_buy_signals = plot_data[plot_data['model_buy'] == 1]
        if not model_buy_signals.empty:
            plt.scatter(model_buy_signals.index, model_buy_signals['Close'], 
                       color='blue', marker='^', s=100, label=f'Model Buy (‚â•{pred_threshold})', 
                       zorder=4, alpha=0.6, edgecolors='black', linewidth=1)
    
    plt.title(f'OHLC Prices with Buy/Sell Signals (Threshold: {pred_threshold})')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# –ë—ç–∫—Ç–µ—Å—Ç –º–æ–¥–µ–ª–∏

**backtest_model** –ü—Ä–æ–≤–æ–¥–∏—Ç –ø–æ—à–∞–≥–æ–≤—ã–π –±—ç–∫—Ç–µ—Å—Ç —Ç–æ—Ä–≥–æ–≤–æ–π –º–æ–¥–µ–ª–∏ —Å TP/SL –∏ –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã

–ê—Ä–≥—É–º–µ–Ω—Ç—ã:
- df: DataFrame —Å –ø—Ä–∏–∑–Ω–∞–∫–∞–º–∏ –∏ —Ü–µ–Ω–æ–≤—ã–º–∏ –¥–∞–Ω–Ω—ã–º–∏
- model: –æ–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å –º–∞—à–∏–Ω–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è
- X_train: —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ, –¥–ª—è –Ω–µ–æ–±—Ö–æ–¥–∏–º—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
- threshold: –ø–æ—Ä–æ–≥ –¥–ª—è —Ç–æ—Ä–≥–æ–≤—ã—Ö —Å–∏–≥–Ω–∞–ª–æ–≤
- tp_pct: —É—Ä–æ–≤–µ–Ω—å —Ç–µ–π–∫-–ø—Ä–æ—Ñ–∏—Ç–∞ (%)
- rr: —Å–æ–æ—Ç–Ω–æ—à–µ–Ω–∏–µ —Ä–∏—Å–∫/–ø—Ä–∏–±—ã–ª—å
- plot: —Ñ–ª–∞–≥ –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è –≥—Ä–∞—Ñ–∏–∫–æ–≤

–í–æ–∑–≤—Ä–∞—â–∞–µ—Ç:
- –û–±—â—É—é –ø—Ä–∏–±—ã–ª—å –∏ —Å—á–µ—Ç—á–∏–∫ TP/SL —Å–¥–µ–ª–æ–∫
- –ú–∞–∫—Å–∏–º–∞–ª—å–Ω—É—é —Å–µ—Ä–∏—é —É–±—ã—Ç–∫–æ–≤
- –ü–æ–º–µ—Å—è—á–Ω—É—é —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫—É
- –¢–∞–±–ª–∏—Ü—É –≤—Å–µ—Ö —Å–¥–µ–ª–æ–∫
- –ì—Ä–∞—Ñ–∏–∫–∏ –∫—Ä–∏–≤–æ–π –∫–∞–ø–∏—Ç–∞–ª–∞ –∏ –º–µ—Å—è—á–Ω–æ–π –ø—Ä–∏–±—ã–ª–∏

In [93]:
def backtest_model(df, model, X_train, threshold=0.5, tp_pct=0.04, rr=2.0, plot=True):
    """
    –ü–æ—à–∞–≥–æ–≤—ã–π –±—ç–∫—Ç–µ—Å—Ç —Ç–æ—Ä–≥–æ–≤–æ–π –º–æ–¥–µ–ª–∏ —Å TP –∏ SL.
    –ü–æ—Å–ª–µ –∑–∞–∫—Ä—ã—Ç–∏—è —Å–¥–µ–ª–∫–∏ —Å–ª–µ–¥—É—é—â–∞—è –ø—Ä–æ–≤–µ—Ä–∫–∞ –Ω–∞—á–∏–Ω–∞–µ—Ç—Å—è —Å–æ —Å–ª–µ–¥—É—é—â–µ–π —Å–≤–µ—á–∏.
    –†–µ–∑—É–ª—å—Ç–∞—Ç: –º–µ—Ç—Ä–∏–∫–∏, —Ç–∞–±–ª–∏—Ü–∞ —Å–¥–µ–ª–æ–∫, –ø–æ–º–µ—Å—è—á–Ω–∞—è —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞ –∏ –∫—Ä–∏–≤–∞—è –∫–∞–ø–∏—Ç–∞–ª–∞.
    
    Args:
        df: DataFrame —Å –¥–∞–Ω–Ω—ã–º–∏ –¥–ª—è –±—ç–∫—Ç–µ—Å—Ç–∞
        model: –æ–±—É—á–µ–Ω–Ω–∞—è –º–æ–¥–µ–ª—å
        X_train: —Ç—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ (–¥–ª—è –ø–æ–ª—É—á–µ–Ω–∏—è feature_cols)
        threshold: –ø–æ—Ä–æ–≥ –¥–ª—è –≤—Ö–æ–¥–∞ –≤ —Å–¥–µ–ª–∫—É
        tp_pct: —É—Ä–æ–≤–µ–Ω—å —Ç–µ–π–∫-–ø—Ä–æ—Ñ–∏—Ç–∞ –≤ –ø—Ä–æ—Ü–µ–Ω—Ç–∞—Ö
        rr: risk-reward ratio
        plot: —Å—Ç—Ä–æ–∏—Ç—å –ª–∏ –≥—Ä–∞—Ñ–∏–∫–∏
    """

    # –ë–∞–∑–æ–≤—ã–µ –∫–æ–ª–æ–Ω–∫–∏ (–∏—Å–∫–ª—é—á–∞–µ–º —Ç–µ, —á—Ç–æ –º–æ–≥—É—Ç –±—ã—Ç—å –≤ df –Ω–æ –Ω–µ –≤ —Ñ–∏—á–∞—Ö)
    base_cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'buy', 'sell', 
                 'buy_strong', 'sell_strong', 'buy_noised', 'sell_noised']
    
    # –ü—Ä–∏–∑–Ω–∞–∫–∏ –±–µ—Ä—É—Ç—Å—è –∏–∑ X_train.columns
    feature_cols = [col for col in X_train.columns if col in df.columns]
    
    # –ü—Ä–æ–≤–µ—Ä—è–µ–º, —á—Ç–æ –≤—Å–µ —Ñ–∏—á–∏ –µ—Å—Ç—å –≤ df
    missing_features = set(X_train.columns) - set(df.columns)
    if missing_features:
        print(f"‚ö†Ô∏è –í–Ω–∏–º–∞–Ω–∏–µ: –æ—Ç—Å—É—Ç—Å—Ç–≤—É—é—Ç —Ñ–∏—á–∏ –≤ df: {missing_features}")
        print(f"–ò—Å–ø–æ–ª—å–∑—É–µ–º —Ç–æ–ª—å–∫–æ –¥–æ—Å—Ç—É–ø–Ω—ã–µ —Ñ–∏—á–∏: {feature_cols}")

    # –î–ª—è SL
    sl_pct = tp_pct / rr

    # –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è (—Ç–æ–ª—å–∫–æ –µ—Å–ª–∏ –µ—Å—Ç—å —Ñ–∏—á–∏)
    if feature_cols:
        preds = model.predict_proba(df[feature_cols])[:, 1]
        df = df.copy()
        df['pred'] = preds
    else:
        print("‚ùå –ù–µ—Ç –¥–æ—Å—Ç—É–ø–Ω—ã—Ö —Ñ–∏—á –¥–ª—è –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏—è!")
        return None

    all_trades = []
    current_trade = None
    i = 0
    n = len(df)
    balance = [0]  # –∫—Ä–∏–≤–∞—è –∫–∞–ø–∏—Ç–∞–ª–∞

    while i < n:
        row = df.iloc[i]

        # –ï—Å–ª–∏ –Ω–µ—Ç —Å–¥–µ–ª–∫–∏ ‚Äî –∏—â–µ–º –≤—Ö–æ–¥
        if current_trade is None:
            if row['pred'] >= threshold:
                entry_price = row['Close']
                tp_price = entry_price * (1 + tp_pct)
                sl_price = entry_price * (1 - sl_pct)

                current_trade = {
                    'entry_date': row['Date'],
                    'entry_price': entry_price,
                    'tp_price': tp_price,
                    'sl_price': sl_price
                }
        else:
            # –ü—Ä–æ–≤–µ—Ä—è–µ–º —É—Å–ª–æ–≤–∏—è –≤—ã—Ö–æ–¥–∞
            if row['Low'] <= current_trade['sl_price']:
                current_trade['exit_date'] = row['Date']
                current_trade['outcome'] = 'SL'
                current_trade['profit_pct'] = -sl_pct
                all_trades.append(current_trade)
                balance.append(balance[-1] - sl_pct)
                current_trade = None
                i += 1  # –ø—Ä–æ–≤–µ—Ä–∫–∞ —Å–æ —Å–ª–µ–¥—É—é—â–µ–π —Å–≤–µ—á–∏
                continue

            if row['High'] >= current_trade['tp_price']:
                current_trade['exit_date'] = row['Date']
                current_trade['outcome'] = 'TP'
                current_trade['profit_pct'] = tp_pct
                all_trades.append(current_trade)
                balance.append(balance[-1] + tp_pct)
                current_trade = None
                i += 1
                continue

        i += 1

    # –ó–∞–∫—Ä—ã–≤–∞–µ–º –ø–æ—Å–ª–µ–¥–Ω—é—é —Å–¥–µ–ª–∫—É, –µ—Å–ª–∏ –æ—Å—Ç–∞–ª–∞—Å—å
    if current_trade is not None:
        current_trade['exit_date'] = df['Date'].iloc[-1]
        current_trade['outcome'] = 'SL'
        current_trade['profit_pct'] = -sl_pct
        all_trades.append(current_trade)
        balance.append(balance[-1] - sl_pct)

    # –í DataFrame –¥–ª—è –∞–Ω–∞–ª–∏–∑–∞
    trades_df = pd.DataFrame(all_trades) if all_trades else pd.DataFrame()

    # –ú–µ—Ç—Ä–∏–∫–∏
    if not trades_df.empty:
        total_profit = trades_df['profit_pct'].sum() * 100
        tp_count = (trades_df['outcome'] == 'TP').sum()
        sl_count = (trades_df['outcome'] == 'SL').sum()
        max_sl_streak = (trades_df['outcome'] == 'SL').astype(int).groupby((trades_df['outcome'] != 'SL').cumsum()).sum().max()
        
        # –†–∞–∑–±–∏–≤–∫–∞ –ø–æ –º–µ—Å—è—Ü–∞–º
        trades_df['month'] = pd.to_datetime(trades_df['entry_date']).dt.to_period('M')
        monthly_profit = trades_df.groupby('month')['profit_pct'].sum() * 100
    else:
        total_profit = 0
        tp_count = 0
        sl_count = 0
        max_sl_streak = 0
        monthly_profit = pd.Series()

    results = {
        'total_profit': total_profit,
        'tp_count': tp_count,
        'sl_count': sl_count,
        'max_sl_streak': max_sl_streak,
        'monthly_profit': monthly_profit,
        'trades_df': trades_df,
        'feature_cols_used': feature_cols
    }

    # –ü–æ—Å—Ç—Ä–æ–µ–Ω–∏–µ –≥—Ä–∞—Ñ–∏–∫–æ–≤
    if plot and not trades_df.empty:
        fig, axes = plt.subplots(2, 1, figsize=(12, 8), gridspec_kw={'height_ratios': [3, 1]})

        # –ö—Ä–∏–≤–∞—è –∫–∞–ø–∏—Ç–∞–ª–∞ —Å –¥–∞—Ç–∞–º–∏ –∑–∞–∫—Ä—ã—Ç–∏—è —Å–¥–µ–ª–æ–∫
        trades_df['cum_profit'] = trades_df['profit_pct'].cumsum() * 100

        # –î–∞—Ç—ã –∑–∞–∫—Ä—ã—Ç–∏—è –¥–ª—è –æ—Å–∏ X
        exit_dates = pd.to_datetime(trades_df['exit_date'])

        axes[0].plot(exit_dates, trades_df['cum_profit'], label='Equity Curve', color='blue')
        axes[0].axhline(0, color='gray', linestyle='--', linewidth=1)
        axes[0].set_title('Equity Curve')
        axes[0].set_xlabel('Exit Date')
        axes[0].set_ylabel('Cumulative Profit %')
        axes[0].legend()

        # –§–æ—Ä–º–∞—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ –¥–∞—Ç –¥–ª—è —É–¥–æ–±–æ—á–∏—Ç–∞–µ–º–æ—Å—Ç–∏
        axes[0].xaxis.set_major_locator(mdates.AutoDateLocator())
        axes[0].xaxis.set_major_formatter(mdates.ConciseDateFormatter(mdates.AutoDateLocator()))
        fig.autofmt_xdate()

        # –ü—Ä–∏–±—ã–ª—å –ø–æ –º–µ—Å—è—Ü–∞–º
        if not monthly_profit.empty:
            monthly_profit.plot(kind='bar', ax=axes[1], color='green')
            axes[1].set_title('Monthly Profit')
            axes[1].set_ylabel('Profit %')

        plt.tight_layout()
        plt.show()
    elif plot:
        print("‚ö†Ô∏è –ù–µ—Ç —Å–¥–µ–ª–æ–∫ –¥–ª—è –ø–æ—Å—Ç—Ä–æ–µ–Ω–∏—è –≥—Ä–∞—Ñ–∏–∫–æ–≤")

    return results