In [None]:
import scipy.sparse as sp
import torch

def torch_linear_to_sparse(tensor, blocksize=(32, 32)):
    if not isinstance(tensor, torch.Tensor):
        raise TypeError("Input must be a PyTorch tensor.")
    if tensor.dim() != 2:
        raise ValueError("Input tensor must be 2D.")
    
    weight_array = tensor.detach().cpu().numpy()    
    sparse_matrix = sp.csr_matrix(weight_array)
    
    return sparse_matrix

In [None]:
from model import GPT, GPTConfig, QLinearTile2D

def load_model_from_checkpoint(ckpt_path):
    if not isinstance(ckpt_path, str):
        raise TypeError("Checkpoint path must be a string.")
    
    checkpoint = torch.load(ckpt_path, map_location='cpu')
    gptconf = GPTConfig(**checkpoint['model_args'])
    model = GPT(gptconf)
    state_dict = checkpoint['model']
    unwanted_prefix = '_orig_mod.'
    for k,v in list(state_dict.items()):
        if k.startswith(unwanted_prefix):
            state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
            
    model.load_state_dict(state_dict)
    return model

model = load_model_from_checkpoint('./quantized_models/ckpt_tiled_quantized.pt')
linear_weights = model.transformer.h[2].mlp.c_fc.weight
sparse_weights = torch_linear_to_sparse(linear_weights, blocksize=(32, 32))

number of parameters: 10.67M
Non-zero blocks: 79 / 576 (13.72% density)


In [None]:
import cupy as cp
from cupyx.scipy.sparse import csr_matrix

# Dane z scipy
data = cp.asarray(sparse_weights.data)
indices = cp.asarray(sparse_weights.indices)
indptr = cp.asarray(sparse_weights.indptr)

W_gpu = csr_matrix((data, indices, indptr), shape=sparse_weights.shape)

# Przykład wektora we
x = cp.random.randn(sparse_weights.shape[1], 1, dtype=cp.float32)

# Inferencja
y = W_gpu @ x


ModuleNotFoundError: No module named 'cupy'