In [46]:
import numpy as np
import pandas as pd
from numpy.random import permutation

np.random.seed(2025)

# 25 показників "якість сигналу" у діапазоні 0–100 (реальні числа)
results = list(np.round(np.random.uniform(0, 100, size=25), 1))
# контрольні значення на межах інтервалів
results.extend([0.0, 25.0, 50.0, 75.0, 100.0])

# 1200 випадкових вимірювань по трьох осях (із кількома штучними викидами)
randframe = pd.DataFrame(np.random.randn(1200, 3), columns=['ax', 'ay', 'az'])
randframe.loc[[99, 777], 'ay'] = [6.5, -7.0]   # extreme
randframe.loc[321, 'az'] = 8.2

# Невелика таблиця для перестановок/семплінгу
nframe = pd.DataFrame(np.arange(30).reshape(6, 5), columns=list('ABCDE'))

In [47]:
randframe

Unnamed: 0,ax,ay,az
0,-0.049538,0.099418,-1.212462
1,0.069020,0.007183,0.268326
2,-0.259820,-1.803209,0.129280
3,0.080078,0.680031,-0.169539
4,1.419880,-0.001824,-0.043246
...,...,...,...
1195,0.474469,-1.262630,-2.181257
1196,0.217057,-1.565543,-0.322148
1197,1.236125,0.619358,-1.164810
1198,-0.703357,0.746462,-1.157144


In [48]:
nframe

Unnamed: 0,A,B,C,D,E
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14
3,15,16,17,18,19
4,20,21,22,23,24
5,25,26,27,28,29


In [49]:
edges = [0, 25, 50, 75, 100]
cat = pd.cut(results, edges, include_lowest=True, right=False)
cat

[[0, 25), [75, 100), [75, 100), [25, 50), [25, 50), ..., [0.0, 25.0), [25.0, 50.0), [50.0, 75.0), [75.0, 100.0), NaN]
Length: 30
Categories (4, interval[int64, left]): [[0, 25) < [25, 50) < [50, 75) < [75, 100)]

In [50]:
cat.categories

IntervalIndex([[0, 25), [25, 50), [50, 75), [75, 100)], dtype='interval[int64, left]')

In [51]:
cat.codes

array([ 0,  3,  3,  1,  1,  1,  2,  1,  3,  3,  1,  3,  0,  3,  0,  1,  2,
        3,  1,  0,  2,  3,  1,  0,  3,  0,  1,  2,  3, -1], dtype=int8)

In [52]:
cat.value_counts()

[0, 25)       6
[25, 50)      9
[50, 75)      4
[75, 100)    10
Name: count, dtype: int64

In [53]:
labels = ['low', 'mid-low', 'mid-high', 'high']
cat_named = pd.cut(results, edges, include_lowest=True, right=False, labels=labels)
pd.Series(cat_named)

0          low
1         high
2         high
3      mid-low
4      mid-low
5      mid-low
6     mid-high
7      mid-low
8         high
9         high
10     mid-low
11        high
12         low
13        high
14         low
15     mid-low
16    mid-high
17        high
18     mid-low
19         low
20    mid-high
21        high
22     mid-low
23         low
24        high
25         low
26     mid-low
27    mid-high
28        high
29         NaN
dtype: category
Categories (4, object): ['low' < 'mid-low' < 'mid-high' < 'high']

In [54]:
cat_qcut = pd.Series(pd.qcut(results, 4, duplicates='drop'))
cat_qcut

0     (-0.001, 26.675]
1        (80.1, 100.0]
2        (80.1, 100.0]
3      (26.675, 49.65]
4      (26.675, 49.65]
5     (-0.001, 26.675]
6        (49.65, 80.1]
7      (26.675, 49.65]
8        (80.1, 100.0]
9        (49.65, 80.1]
10     (26.675, 49.65]
11       (49.65, 80.1]
12    (-0.001, 26.675]
13       (49.65, 80.1]
14    (-0.001, 26.675]
15     (26.675, 49.65]
16       (49.65, 80.1]
17       (80.1, 100.0]
18     (26.675, 49.65]
19    (-0.001, 26.675]
20       (49.65, 80.1]
21       (80.1, 100.0]
22     (26.675, 49.65]
23    (-0.001, 26.675]
24       (80.1, 100.0]
25    (-0.001, 26.675]
26    (-0.001, 26.675]
27       (49.65, 80.1]
28       (49.65, 80.1]
29       (80.1, 100.0]
dtype: category
Categories (4, interval[float64, right]): [(-0.001, 26.675] < (26.675, 49.65] < (49.65, 80.1] < (80.1, 100.0]]

In [55]:
cat_qcut.value_counts()

(-0.001, 26.675]    8
(49.65, 80.1]       8
(26.675, 49.65]     7
(80.1, 100.0]       7
Name: count, dtype: int64

In [56]:
randframe.describe()

Unnamed: 0,ax,ay,az
count,1200.0,1200.0,1200.0
mean,-0.018509,-0.02366,0.049737
std,1.006631,1.030968,1.038295
min,-3.586499,-7.0,-3.903772
25%,-0.696465,-0.688818,-0.608959
50%,-0.004205,-0.027693,0.056776
75%,0.693865,0.651293,0.713963
max,3.249025,6.5,8.2


In [57]:
std = randframe.std()

In [58]:
outliers = randframe[(np.abs(randframe) > (3 * std)).any(axis=1)]
outliers

Unnamed: 0,ax,ay,az
99,0.725209,6.5,-0.122689
141,0.644094,3.093397,0.280416
321,0.79525,-0.149737,8.2
331,-0.323533,3.171714,-0.606823
447,-0.438246,0.753566,-3.903772
488,2.621531,-1.078721,-3.21868
630,-3.586499,-0.604186,0.836046
706,-0.268785,-1.877115,-3.286758
777,0.768249,-7.0,-0.593741
984,3.249025,-2.222199,-2.12812


In [59]:
randframe.drop(outliers.index, inplace=True)
randframe.describe()

Unnamed: 0,ax,ay,az
count,1187.0,1187.0,1187.0
mean,-0.025803,-0.018866,0.051564
std,0.993627,0.977714,0.994991
min,-3.009869,-2.987318,-3.083494
25%,-0.705234,-0.677535,-0.605826
50%,-0.009226,-0.024333,0.057636
75%,0.677955,0.651086,0.710701
max,2.960335,2.483224,3.014649


In [62]:
random_order = np.random.permutation(nframe.shape[1])
random_order

array([3, 1, 4, 0, 2])

In [65]:
permutated_nframe = nframe.take(random_order, axis=1)
permutated_nframe

Unnamed: 0,D,B,E,A,C
0,3,1,4,0,2
1,8,6,9,5,7
2,13,11,14,10,12
3,18,16,19,15,17
4,23,21,24,20,22
5,28,26,29,25,27


In [66]:
nframe.take(random_order, axis=0)

Unnamed: 0,A,B,C,D,E
3,15,16,17,18,19
1,5,6,7,8,9
4,20,21,22,23,24
0,0,1,2,3,4
2,10,11,12,13,14
