In [1]:


from transformers.activations import ACT2FN
from transformers.cache_utils import Cache, DynamicCache, StaticCache
from transformers.generation import GenerationMixin
from transformers.modeling_attn_mask_utils import AttentionMaskConverter
from transformers.modeling_flash_attention_utils import FlashAttentionKwargs

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from qwen_model import * 

In [4]:
import transformers 
transformers.__version__

'4.49.0.dev0'

In [None]:
class HybridQwen2Config(Qwen2Config):
    def __init__(
        self,
        # Add your new config parameters
        hybrid_attention_layers=None,  # List of layer indices to use hybrid attention
        alpha_init=0.5,  # Initial value for alpha parameter
        **kwargs
    ):
        # Call parent constructor first
        super().__init__(**kwargs)
        
        # Set new attributes
        self.hybrid_attention_layers = hybrid_attention_layers if hybrid_attention_layers is not None else []
        self.alpha_init = alpha_init
        
        # Update model type to differentiate from base Qwen2
        self.model_type = "hybrid_qwen2"
        
        # Add new parameters to the config's serialization
        self.register_for_auto_class = True

    def to_dict(self):
        """Convert configuration to dictionary."""
        config_dict = super().to_dict()
        # Add the new parameters
        config_dict["hybrid_attention_layers"] = self.hybrid_attention_layers
        config_dict["alpha_init"] = self.alpha_init
        return config_dict

    @classmethod
    def from_dict(cls, config_dict):
        """Create a configuration from dictionary."""
        # First create the config with base parameters
        config = super().from_dict(config_dict)
        # Add the hybrid-specific parameters
        config.hybrid_attention_layers = config_dict.get("hybrid_attention_layers", [])
        config.alpha_init = config_dict.get("alpha_init", 0.5)
        return config