# `utils.df_to_csr`

In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import slickml

print(f"Loaded SlickML Version = {slickml.__version__}")

Loaded SlickML Version = 0.2.0b0


In [4]:
from slickml.utils import df_to_csr

help(df_to_csr)

Help on function df_to_csr in module slickml.utils._transform:

df_to_csr(df: pandas.core.frame.DataFrame, *, fillna: Optional[float] = 0.0, verbose: Optional[bool] = False) -> scipy.sparse._csr.csr_matrix
    Transforms a pandas DataFrame into a Compressed Sparse Row (CSR) matrix [1]_.
    
    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    
    fillna : float, optional
        Value to fill nulls, by default 0.0
    
    verbose : bool, optional
        Whether to show the memory usage comparison of csr matrix and pandas DataFrame, by default False
    
    Returns
    -------
    csr_matrix
        Transformed pandas DataFrame in CSR matrix format
    
    Notes
    -----
    This utility function is being used across API when the `sparse_matrix=True` for all classifiers
    and regressors. In practice, when we are dealing with sparse matrices, it does make sense to
    employ this functionality. It should be noted that using sparse matrices when the inp

### Example 1: `df_to_csr` with `pandas.DataFrame` as the input data

In [7]:
import pandas as pd

df = pd.DataFrame(
    {
        "foo": [0, 1, 0, 1],
    }
)
df

Unnamed: 0,foo
0,0
1,1
2,0
3,1


In [8]:
csr = df_to_csr(
    df,
    fillna=0.0,
    verbose=True,
)
csr

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   foo     4 non-null      int64
dtypes: int64(1)
memory usage: 160.0 bytes
CSR memory usage: 44.0 bytes
CSR memory usage: 0.00004 MB


<4x1 sparse matrix of type '<class 'numpy.float64'>'
	with 2 stored elements in Compressed Sparse Row format>

In [10]:
type(csr)

scipy.sparse._csr.csr_matrix

In [11]:
csr.data

array([1., 1.])