In [1]:
"""
!pip install -U scikit-learn
!pip install -U imbalanced-learn
!pip install notebook
!pip install pandas
!pip install numpy
"""

'\n!pip install -U scikit-learn\n!pip install -U imbalanced-learn\n!pip install notebook\n!pip install pandas\n!pip install numpy\n'

# Data cleaning

In [2]:
import pandas as pd

df = pd.read_csv('sample_dataset.csv')
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,,10.38,122.80,1001.0,0.11840,0.27760,0.3001,0.14710,0.2419,0.07871,...,17.33,,2019.0,0.16220,0.6656,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,,,0.0869,0.07017,,0.05667,...,23.41,158.80,1956.0,0.12380,0.1866,0.2416,0.1860,0.2750,,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,,,,0.05999,...,25.53,,1709.0,0.14440,0.4245,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.2414,,0.2597,0.09744,...,26.50,,567.7,0.20980,0.8663,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,,,,0.13280,0.1980,,0.1809,,...,16.67,152.20,1575.0,0.13740,,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,,0.2439,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.2113,0.4107,0.2216,0.2060,0.07115,0
565,,28.25,131.20,1261.0,0.09780,0.10340,0.1440,0.09791,0.1752,,...,38.25,155.00,1731.0,0.11660,0.1922,0.3215,0.1628,0.2572,,0
566,16.60,28.08,108.30,,0.08455,0.10230,,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.3514,0.15200,0.2397,,...,,184.60,1821.0,0.16500,0.8681,0.9387,0.2650,0.4087,0.12400,0


In [3]:
df.dtypes

mean radius                float64
mean texture               float64
mean perimeter             float64
mean area                  float64
mean smoothness            float64
mean compactness           float64
mean concavity             float64
mean concave points        float64
mean symmetry              float64
mean fractal dimension     float64
radius error               float64
texture error              float64
perimeter error            float64
area error                  object
smoothness error           float64
compactness error          float64
concavity error            float64
concave points error       float64
symmetry error             float64
fractal dimension error    float64
worst radius               float64
worst texture              float64
worst perimeter            float64
worst area                 float64
worst smoothness           float64
worst compactness          float64
worst concavity            float64
worst concave points       float64
worst symmetry      

## Selecting numerical and categorical variables

In [4]:
numerical_variables = df.select_dtypes(exclude=['object','category','bool']).columns
categorical_variables = df.select_dtypes(include=['object','category','bool']).columns

In [5]:
numerical_variables

Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'smoothness error',
       'compactness error', 'concavity error', 'concave points error',
       'symmetry error', 'fractal dimension error', 'worst radius',
       'worst texture', 'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension', 'target'],
      dtype='object')

In [6]:
categorical_variables

Index(['area error'], dtype='object')

In [7]:
df['area error'].value_counts()

area error
A    489
B      4
C      1
Name: count, dtype: int64

## Cleaning the numerical features

In [8]:
X = df.iloc[:,0:3]
X

Unnamed: 0,mean radius,mean texture,mean perimeter
0,,10.38,122.80
1,20.57,17.77,132.90
2,19.69,21.25,130.00
3,11.42,20.38,77.58
4,20.29,14.34,
...,...,...,...
564,21.56,22.39,142.00
565,,28.25,131.20
566,16.60,28.08,108.30
567,20.60,29.33,140.10


### Replace blanks with the mean value

In [9]:
X['mean radius'].mean()

14.059547717842323

In [10]:
X['mean texture'].mean()

19.311829268292684

In [11]:
X['mean perimeter'].mean()

92.03902534113061

In [12]:
from sklearn.impute import SimpleImputer

cleaner = SimpleImputer()  # strategy='mean'
"""
cleaner.fit(X)
cleaner.transform(X)
"""
cleaner.fit_transform(X)

array([[ 14.05954772,  10.38      , 122.8       ],
       [ 20.57      ,  17.77      , 132.9       ],
       [ 19.69      ,  21.25      , 130.        ],
       ...,
       [ 16.6       ,  28.08      , 108.3       ],
       [ 20.6       ,  29.33      , 140.1       ],
       [  7.76      ,  19.31182927,  47.92      ]])

### Replace blanks with the median value (try with skewed distribution)

In [13]:
X['mean radius'].median()

13.28

In [14]:
X['mean texture'].median()

18.86

In [15]:
X['mean perimeter'].median()

86.34

In [16]:
cleaner = SimpleImputer(strategy='median')
cleaner.fit_transform(X)

array([[ 13.28,  10.38, 122.8 ],
       [ 20.57,  17.77, 132.9 ],
       [ 19.69,  21.25, 130.  ],
       ...,
       [ 16.6 ,  28.08, 108.3 ],
       [ 20.6 ,  29.33, 140.1 ],
       [  7.76,  18.86,  47.92]])

### Replace blanks with fixed value

In [17]:
cleaner = SimpleImputer(strategy='constant',fill_value=0)
cleaner.fit_transform(X)

array([[  0.  ,  10.38, 122.8 ],
       [ 20.57,  17.77, 132.9 ],
       [ 19.69,  21.25, 130.  ],
       ...,
       [ 16.6 ,  28.08, 108.3 ],
       [ 20.6 ,  29.33, 140.1 ],
       [  7.76,   0.  ,  47.92]])

## Cleaning the categorical features

In [18]:
X = df[['area error']]
X.isnull().sum()

area error    75
dtype: int64

### Cleaning using the most probable value

In [19]:
cleaner = SimpleImputer(strategy='most_frequent')
pd.DataFrame(cleaner.fit_transform(X)).value_counts()

A    564
B      4
C      1
Name: count, dtype: int64

### Cleaning using a new value

In [20]:
cleaner = SimpleImputer(strategy='constant',fill_value='Undefined')
pd.DataFrame(cleaner.fit_transform(X)).value_counts()

A            489
Undefined     75
B              4
C              1
Name: count, dtype: int64

## KNN blank filling

In [21]:
X = df.iloc[:,0:3]
X

Unnamed: 0,mean radius,mean texture,mean perimeter
0,,10.38,122.80
1,20.57,17.77,132.90
2,19.69,21.25,130.00
3,11.42,20.38,77.58
4,20.29,14.34,
...,...,...,...
564,21.56,22.39,142.00
565,,28.25,131.20
566,16.60,28.08,108.30
567,20.60,29.33,140.10


### KNN imputer without weights

In [22]:
from sklearn.impute import KNNImputer

cleaner = KNNImputer(n_neighbors=10) # weights='uniform'
cleaner.fit_transform(X)

array([[ 15.8968,  10.38  , 122.8   ],
       [ 20.57  ,  17.77  , 132.9   ],
       [ 19.69  ,  21.25  , 130.    ],
       ...,
       [ 16.6   ,  28.08  , 108.3   ],
       [ 20.6   ,  29.33  , 140.1   ],
       [  7.76  ,  19.408 ,  47.92  ]])

### KNN imputer with weights (inverse values of each distance)

In [23]:
cleaner = KNNImputer(n_neighbors=10,weights='distance')
cleaner.fit_transform(X)

array([[ 15.51148032,  10.38      , 122.8       ],
       [ 20.57      ,  17.77      , 132.9       ],
       [ 19.69      ,  21.25      , 130.        ],
       ...,
       [ 16.6       ,  28.08      , 108.3       ],
       [ 20.6       ,  29.33      , 140.1       ],
       [  7.76      ,  24.25892267,  47.92      ]])

## ColumnTransformer and make_column_selector

In [24]:
from sklearn.compose import ColumnTransformer, make_column_selector

### Using ColumnTransformer

In [25]:
cleaner = ColumnTransformer([
    ('numerical_transformer',SimpleImputer(),numerical_variables),
    ('categorical_transformer',SimpleImputer(strategy='most_frequent'),categorical_variables)
])
cleaner.fit_transform(df)

array([[14.059547717842323, 10.38, 122.8, ..., 0.1189, 0.0, 'A'],
       [20.57, 17.77, 132.9, ..., 0.08436317021276594, 0.0, 'A'],
       [19.69, 21.25, 130.0, ..., 0.08758, 0.0, 'A'],
       ...,
       [16.6, 28.08, 108.3, ..., 0.0782, 0.0, 'A'],
       [20.6, 29.33, 140.1, ..., 0.124, 0.0, 'A'],
       [7.76, 19.311829268292684, 47.92, ..., 0.07039, 1.0, 'A']],
      dtype=object)

### ,remainder='drop')

In [26]:
cleaner = ColumnTransformer([
    ('numerical_transformer',SimpleImputer(),[0,1]),
    ('categorical_transformer',SimpleImputer(strategy='most_frequent'),categorical_variables)
],remainder='drop')
cleaner.fit_transform(df)

array([[14.059547717842323, 10.38, 'A'],
       [20.57, 17.77, 'A'],
       [19.69, 21.25, 'A'],
       ...,
       [16.6, 28.08, 'A'],
       [20.6, 29.33, 'A'],
       [7.76, 19.311829268292684, 'A']], dtype=object)

### ,remainder='passthrough')

In [27]:
cleaner = ColumnTransformer([
    ('numerical_transformer',SimpleImputer(),[0,1]),
    ('categorical_transformer',SimpleImputer(strategy='most_frequent'),categorical_variables)
],remainder='passthrough')
cleaner.fit_transform(df)

array([[14.059547717842323, 10.38, 'A', ..., 0.4601, 0.1189, 0.0],
       [20.57, 17.77, 'A', ..., 0.275, nan, 0.0],
       [19.69, 21.25, 'A', ..., 0.3613, 0.08758, 0.0],
       ...,
       [16.6, 28.08, 'A', ..., 0.2218, 0.0782, 0.0],
       [20.6, 29.33, 'A', ..., 0.4087, 0.124, 0.0],
       [7.76, 19.311829268292684, 'A', ..., 0.2871, 0.07039, 1.0]],
      dtype=object)

### Using make_column_selector

In [28]:
cleaner = ColumnTransformer([
    ('numerical_transformer',SimpleImputer(),make_column_selector(dtype_exclude='object')),
    ('categorical_transformer',SimpleImputer(strategy='most_frequent'),make_column_selector(dtype_include='object'))
]) # ,remainder='drop'
cleaner.fit_transform(df)

array([[14.059547717842323, 10.38, 122.8, ..., 0.1189, 0.0, 'A'],
       [20.57, 17.77, 132.9, ..., 0.08436317021276594, 0.0, 'A'],
       [19.69, 21.25, 130.0, ..., 0.08758, 0.0, 'A'],
       ...,
       [16.6, 28.08, 108.3, ..., 0.0782, 0.0, 'A'],
       [20.6, 29.33, 140.1, ..., 0.124, 0.0, 'A'],
       [7.76, 19.311829268292684, 47.92, ..., 0.07039, 1.0, 'A']],
      dtype=object)

## Exercise 1

* Load sample_dataset.csv
* Replace the missings in the categorical variables with "N"
* Replace the missings in the numerical variables with the mean value

In [29]:
import pandas as pd
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer

df = pd.read_csv('sample_dataset.csv')
cleaner = ColumnTransformer([
    ('numerical_transformer',SimpleImputer(strategy='mean'),make_column_selector(dtype_exclude='object')),
    ('categorical_transformer',SimpleImputer(strategy='constant',fill_value='N'),make_column_selector(dtype_include='object'))
])
cleaner.fit_transform(df)

array([[14.059547717842323, 10.38, 122.8, ..., 0.1189, 0.0, 'A'],
       [20.57, 17.77, 132.9, ..., 0.08436317021276594, 0.0, 'A'],
       [19.69, 21.25, 130.0, ..., 0.08758, 0.0, 'A'],
       ...,
       [16.6, 28.08, 108.3, ..., 0.0782, 0.0, 'A'],
       [20.6, 29.33, 140.1, ..., 0.124, 0.0, 'A'],
       [7.76, 19.311829268292684, 47.92, ..., 0.07039, 1.0, 'A']],
      dtype=object)

## Exercise 2

* Load sample_dataset.csv
* Replace the missings in the float variables using KNN with 10 neighbors and a distance-based weights
* Replace the missings in the categorical variables using the most frequent value

In [30]:
import pandas as pd
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer, KNNImputer

df = pd.read_csv('sample_dataset.csv')
cleaner = ColumnTransformer([
    ('float_transformer',KNNImputer(n_neighbors=10,weights='distance'),make_column_selector(dtype_include='float64')),
    ('categorical_transformer',SimpleImputer(strategy='most_frequent'),make_column_selector(dtype_include='object'))
],remainder='passthrough')
cleaner.fit_transform(df)

array([[15.637884267617215, 10.38, 122.8, ..., 0.1189, 'A', 0],
       [20.57, 17.77, 132.9, ..., 0.08216467870588635, 'A', 0],
       [19.69, 21.25, 130.0, ..., 0.08758, 'A', 0],
       ...,
       [16.6, 28.08, 108.3, ..., 0.0782, 'A', 0],
       [20.6, 29.33, 140.1, ..., 0.124, 'A', 0],
       [7.76, 21.692427354277637, 47.92, ..., 0.07039, 'A', 1]],
      dtype=object)

# Encoding of the categorical features

In [31]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder

X = np.array([['A'],['A'],['B'],['C']])
X

array([['A'],
       ['A'],
       ['B'],
       ['C']], dtype='<U1')

## One-hot encoding (introduces dummy variables)

In [32]:
encoder = OneHotEncoder()
encoder.fit_transform(X).todense()

matrix([[1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

### Output with dense matrix

In [33]:
encoder = OneHotEncoder(sparse_output=False)
encoder.fit_transform(X)

array([[1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Removing first dummy variable (useful when working with neural model or linear model/regression)

In [34]:
encoder = OneHotEncoder(sparse_output=False,drop='first')
encoder.fit_transform(X)

array([[0., 0.],
       [0., 0.],
       [1., 0.],
       [0., 1.]])

### Removing one dummy variable from binary features

In [35]:
X = np.array([['A'],['A'],['A'],['B']])
X

array([['A'],
       ['A'],
       ['A'],
       ['B']], dtype='<U1')

In [36]:
encoder = OneHotEncoder(sparse_output=False,drop='if_binary')
encoder.fit_transform(X)

array([[0.],
       [0.],
       [0.],
       [1.]])

### Error handling

In [37]:
encoder = OneHotEncoder(sparse_output=False) # handle_unknown='error'
encoder.fit(X)

In [38]:
encoder.transform(X)

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.]])

In [39]:
Y = np.array([['A'],['A'],['B'],['C']])

In [40]:
"""
encoder.transform(Y)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[39], line 2
      1 Y = np.array([['A'],['A'],['B'],['C']])
----> 2 encoder.transform(Y)

File /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sklearn/utils/_set_output.py:157, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    155 @wraps(f)
    156 def wrapped(self, X, *args, **kwargs):
--> 157     data_to_wrap = f(self, X, *args, **kwargs)
    158     if isinstance(data_to_wrap, tuple):
    159         # only wrap the first output for cross decomposition
    160         return_tuple = (
    161             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    162             *data_to_wrap[1:],
    163         )

File /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py:1027, in OneHotEncoder.transform(self, X)
   1022 # validation of X happens in _check_X called by _transform
   1023 warn_on_unknown = self.drop is not None and self.handle_unknown in {
   1024     "ignore",
   1025     "infrequent_if_exist",
   1026 }
-> 1027 X_int, X_mask = self._transform(
   1028     X,
   1029     handle_unknown=self.handle_unknown,
   1030     force_all_finite="allow-nan",
   1031     warn_on_unknown=warn_on_unknown,
   1032 )
   1034 n_samples, n_features = X_int.shape
   1036 if self._drop_idx_after_grouping is not None:

File /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py:200, in _BaseEncoder._transform(self, X, handle_unknown, force_all_finite, warn_on_unknown, ignore_category_indices)
    195 if handle_unknown == "error":
    196     msg = (
    197         "Found unknown categories {0} in column {1}"
    198         " during transform".format(diff, i)
    199     )
--> 200     raise ValueError(msg)
    201 else:
    202     if warn_on_unknown:

ValueError: Found unknown categories ['C'] in column 0 during transform
"""

'\nencoder.transform(Y)\n\n---------------------------------------------------------------------------\nValueError                                Traceback (most recent call last)\nCell In[39], line 2\n      1 Y = np.array([[\'A\'],[\'A\'],[\'B\'],[\'C\']])\n----> 2 encoder.transform(Y)\n\nFile /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sklearn/utils/_set_output.py:157, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)\n    155 @wraps(f)\n    156 def wrapped(self, X, *args, **kwargs):\n--> 157     data_to_wrap = f(self, X, *args, **kwargs)\n    158     if isinstance(data_to_wrap, tuple):\n    159         # only wrap the first output for cross decomposition\n    160         return_tuple = (\n    161             _wrap_data_with_container(method, data_to_wrap[0], X, self),\n    162             *data_to_wrap[1:],\n    163         )\n\nFile /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sklearn/preprocessing/_

In [41]:
encoder = OneHotEncoder(sparse_output=False,handle_unknown='ignore')
encoder.fit(X)
encoder.transform(X)

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.]])

In [42]:
encoder.transform(Y)

array([[1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 0.]])

In [43]:
encoder.fit(Y)
encoder.transform(Y)

array([[1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Dealing with pairs of categories

In [44]:
X = [['A','X'],['B','Y'],['C','Z']]
encoder = OneHotEncoder(sparse_output=False,categories=[['A','B','C','D'],['X','Y','Z']])
encoder.fit_transform(X)

array([[1., 0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0., 0., 1.]])

In [45]:
Y = [['D','Z']]
encoder.transform(Y)

array([[0., 0., 0., 1., 0., 0., 1.]])

## Ordinal encoding (no dummy variables / no dimensionality change)

In [46]:
from sklearn.preprocessing import OrdinalEncoder
X = [['High'],['Low'],['Low'],['Medium']]
encoder = OrdinalEncoder()
encoder.fit_transform(X)

array([[0.],
       [1.],
       [1.],
       [2.]])

### We want Low -> 0 / Medium -> 1 / High -> 2

In [47]:
encoder = OrdinalEncoder(categories=[['Low','Medium','High']])
encoder.fit_transform(X)

array([[2.],
       [0.],
       [0.],
       [1.]])

In [48]:
X = [['High','A'],['Low','C'],['Low','B'],['Medium','C']]
encoder = OrdinalEncoder(categories=[['Low','Medium','High'],['A','B','C']])
encoder.fit_transform(X)

array([[2., 0.],
       [0., 2.],
       [0., 1.],
       [1., 2.]])

In [49]:
X = [['Under 18'],['18-25'],['25-30'],['Over 30']]
encoder = OrdinalEncoder(categories=[['Under 18','18-25','25-30','Over 30']])
encoder.fit_transform(X)

array([[0.],
       [1.],
       [2.],
       [3.]])

## Label encoding of the target variable (used only for target variables not feature variables to get integer labels (no order unlike ordinal encoding))

In [50]:
from sklearn.preprocessing import LabelEncoder

Y = ['A','B','B','C','D']
encoder = LabelEncoder()
encoder.fit_transform(Y)

array([0, 1, 1, 2, 3])

In [51]:
encoder.inverse_transform(encoder.fit_transform(Y))

array(['A', 'B', 'B', 'C', 'D'], dtype='<U1')

## Exercise 1
* Use the dataset
  X = [['X','High'],['Y','Low'],['Z','Medium'],['X','Low']]
* Apply One-hot encoding to all variables
* Apply One-hot encoding to the first column and ordinal encoding to the second column following the rank Low, Medium, High

In [52]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer

X = [['X','High'],['Y','Low'],['Z','Medium'],['X','Low']]
encoder = ColumnTransformer([
    ('all_variables',OneHotEncoder(),[0,1])
])
encoder.fit_transform(X)

array([[1., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0., 1.],
       [1., 0., 0., 0., 1., 0.]])

In [53]:
encoder = ColumnTransformer([
    ('first_column',OneHotEncoder(),[0]),
    ('second_column',OrdinalEncoder(categories=[['Low','Medium','High']]),[1])
])
encoder.fit_transform(X)

array([[1., 0., 0., 2.],
       [0., 1., 0., 0.],
       [0., 0., 1., 1.],
       [1., 0., 0., 0.]])

## Exercise 2
* Use the dataset
  X = [['X','High'],['Y','Low'],['Z','Medium'],['X','Low']]
* Apply One-hot encoding to the first variable and ordinal encoding to the second variable.
  - The first variable must be encoded considering this set of values: X, Y, Z, W.
  - The second variable must be encoded considering this set of ranked values: Low, Medium, High, Very High
* Consider the dataset:
  Y = [['W','Very High']]
* Transform this dataset according to the fitted encoder

In [54]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer

X = [['X','High'],['Y','Low'],['Z','Medium'],['X','Low']]
encoder = ColumnTransformer([
    ('first_variable',OneHotEncoder(categories=[['X','Y','Z','W']]),[0]),
    ('second_variable',OrdinalEncoder(categories=[['Low','Medium','High','Very High']]),[1])
])
encoder.fit_transform(X)

array([[1., 0., 0., 0., 2.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 1.],
       [1., 0., 0., 0., 0.]])

In [55]:
Y = [['W','Very High']]
encoder.transform(Y)

array([[0., 0., 0., 1., 3.]])