In [1]:
import numpy as np

In [2]:
product_names = np.array(['хлеб', 'молоко', 'творог'])
product_names

array(['хлеб', 'молоко', 'творог'], dtype='<U6')

In [3]:
price = np.array([30, 80, 60])
price

array([30, 80, 60])

In [4]:
names = np.array(['Bob', 'Alice', 'Joe'])
names

array(['Bob', 'Alice', 'Joe'], dtype='<U5')

в строках - покупки покупателей, в столбцах - продукты (хлеб, молоко, творог)

In [5]:
history = np.array([[1, 2, 1], 
                    [2, 3, 0], 
                    [0, 1, 4]])
history

array([[1, 2, 1],
       [2, 3, 0],
       [0, 1, 4]])

In [6]:
history[1:]

array([[2, 3, 0],
       [0, 1, 4]])

In [7]:
history[0]

array([1, 2, 1])

In [8]:
history[names=='Bob', 0]

array([1])

In [9]:
history[names=='Bob', product_names=='хлеб']

array([1])

#### Broadcasting

In [10]:
history

array([[1, 2, 1],
       [2, 3, 0],
       [0, 1, 4]])

In [11]:
price

array([30, 80, 60])

In [12]:
sum_history = history * price
sum_history

array([[ 30, 160,  60],
       [ 60, 240,   0],
       [  0,  80, 240]])

In [13]:
sum_history[names=='Bob']

array([[ 30, 160,  60]])

#### Aggregation

In [14]:
sum_history.sum()

870

По покупателям:

In [15]:
sum_history.sum(axis=1)

array([250, 300, 320])

In [16]:
sum_history.sum(axis=1)[names=='Bob']

array([250])

По продуктам:

In [17]:
sum_history.sum(axis=0)

array([ 90, 480, 300])

In [18]:
sum_history.sum(axis=0)[product_names=='творог']

array([300])

In [19]:
sum_history

array([[ 30, 160,  60],
       [ 60, 240,   0],
       [  0,  80, 240]])

In [20]:
sum_history.mean(axis=0)

array([ 30., 160., 100.])

In [21]:
sum_history.min(axis=0)

array([ 0, 80,  0])

In [22]:
sum_history.max(axis=0)

array([ 60, 240, 240])

#### Объединение массивов

hstack

In [23]:
features = np.array(['Rooms', 'Square', 'KitchenSquare'])
features

array(['Rooms', 'Square', 'KitchenSquare'], dtype='<U13')

In [24]:
flat_id = np.array([123, 124, 125, 180])

In [25]:
flats = np.array([[1, 35, 8], 
                  [1, 38, 10], 
                  [2, 45, 11], 
                  [3, 65, 12]])
flats

array([[ 1, 35,  8],
       [ 1, 38, 10],
       [ 2, 45, 11],
       [ 3, 65, 12]])

Цена - в миллионах рублей

In [26]:
price = np.array([3.5, 3.7, 5.5, 7.2])

In [27]:
price.ndim

1

In [28]:
price.shape

(4,)

In [29]:
price = price.reshape((-1, 1))
price

array([[3.5],
       [3.7],
       [5.5],
       [7.2]])

In [30]:
flats

array([[ 1, 35,  8],
       [ 1, 38, 10],
       [ 2, 45, 11],
       [ 3, 65, 12]])

In [31]:
flats2 = np.hstack([flats, price])
flats2

array([[ 1. , 35. ,  8. ,  3.5],
       [ 1. , 38. , 10. ,  3.7],
       [ 2. , 45. , 11. ,  5.5],
       [ 3. , 65. , 12. ,  7.2]])

vstack

In [32]:
new_flat = np.array([4, 75, 15, 12.5])
new_flat

array([ 4. , 75. , 15. , 12.5])

In [33]:
flats2 = np.vstack([flats2, new_flat])
flats2

array([[ 1. , 35. ,  8. ,  3.5],
       [ 1. , 38. , 10. ,  3.7],
       [ 2. , 45. , 11. ,  5.5],
       [ 3. , 65. , 12. ,  7.2],
       [ 4. , 75. , 15. , 12.5]])

In [34]:
flats2[:, -1]

array([ 3.5,  3.7,  5.5,  7.2, 12.5])

In [35]:
flats2[:, -1].sum()

32.4

In [36]:
flats2[:, -1].mean()

6.4799999999999995

In [37]:
np.median(flats2[:, -1])

5.5

In [38]:
features2 = np.hstack([features, np.array(['Price'])])
features2

array(['Rooms', 'Square', 'KitchenSquare', 'Price'], dtype='<U13')

In [39]:
flats2[:, features2=='Rooms']

array([[1.],
       [1.],
       [2.],
       [3.],
       [4.]])

In [40]:
flats2[:, features2=='Rooms'].mean()

2.2

In [41]:
np.median(flats2[:, features2=='Rooms'])

2.0

In [42]:
new_flat2 = np.array([5, 85, 12, np.nan])

In [43]:
flats2.shape

(5, 4)

In [44]:
new_flat2.shape

(4,)

In [45]:
flats2 = np.vstack([flats2[:1, :], new_flat2, flats2[1:, :]])
flats2

array([[ 1. , 35. ,  8. ,  3.5],
       [ 5. , 85. , 12. ,  nan],
       [ 1. , 38. , 10. ,  3.7],
       [ 2. , 45. , 11. ,  5.5],
       [ 3. , 65. , 12. ,  7.2],
       [ 4. , 75. , 15. , 12.5]])

In [46]:
a = np.array([2, 4, np.nan])

In [47]:
a

array([ 2.,  4., nan])

In [48]:
flats2[1, -1] = 0

In [49]:
flats2[1, -1] = flats2[:, -1].mean()

In [50]:
flats2

array([[ 1. , 35. ,  8. ,  3.5],
       [ 5. , 85. , 12. ,  5.4],
       [ 1. , 38. , 10. ,  3.7],
       [ 2. , 45. , 11. ,  5.5],
       [ 3. , 65. , 12. ,  7.2],
       [ 4. , 75. , 15. , 12.5]])

In [51]:
flats2[:, -1].sum()

37.8