In [1]:
import pandas as pd
import numpy as np

data = pd.DataFrame({
    'x1': np.random.randint(-100, 100, 1000).astype(float),
    'y1': np.random.randint(-80, 80, 1000).astype(float),
    'z1': np.random.randint(-150, 150, 1000).astype(float),
})
data

Unnamed: 0,x1,y1,z1
0,-83.0,72.0,-40.0
1,90.0,-46.0,-33.0
2,88.0,33.0,-99.0
3,15.0,52.0,-13.0
4,-46.0,-76.0,1.0
...,...,...,...
995,-73.0,37.0,-35.0
996,-16.0,-35.0,-119.0
997,-6.0,-39.0,75.0
998,97.0,38.0,-98.0


### Standardization

In [3]:
from sklearn.preprocessing import StandardScaler,normalize,MinMaxScaler

Scaler=StandardScaler()
Scaled_data=Scaler.fit_transform(data.iloc[:,:3])
mean=Scaled_data.mean()
stdvar=Scaled_data.std()
print("StandardScaler mean is : ",mean," \nStandardScaler standard devaition is : ",stdvar) 

StandardScaler mean is :  1.1842378929335004e-17  
StandardScaler standard devaition is :  1.0


### Normalisation

In [5]:
min_max_scaler=MinMaxScaler()
normalised_data=min_max_scaler.fit_transform(data.iloc[:,:3])
mean=normalised_data.mean()
stdvar=normalised_data.std()
print("normalised_data mean is : ",mean," \nnormalised_data standard devaition is : ",stdvar) 

normalised_data mean is :  0.4912775190396356  
normalised_data standard devaition is :  0.28962164810541263


In [6]:
data

Unnamed: 0,x1,y1,z1
0,-83.0,72.0,-40.0
1,90.0,-46.0,-33.0
2,88.0,33.0,-99.0
3,15.0,52.0,-13.0
4,-46.0,-76.0,1.0
...,...,...,...
995,-73.0,37.0,-35.0
996,-16.0,-35.0,-119.0
997,-6.0,-39.0,75.0
998,97.0,38.0,-98.0


### aggregation

In [8]:
data.agg({'x1':['sum','min'],'y1':['min','max']})

Unnamed: 0,x1,y1
sum,-3083.0,
min,-100.0,-80.0
max,,79.0


In [9]:
data.agg(['sum','min'])

Unnamed: 0,x1,y1,z1
sum,-3083.0,-1286.0,-2965.0
min,-100.0,-80.0,-150.0


In [10]:
data.agg(['sum','min'],axis=0)

Unnamed: 0,x1,y1,z1
sum,-3083.0,-1286.0,-2965.0
min,-100.0,-80.0,-150.0


### Discreatization

In [12]:
pd.cut(data['z1'],bins=5)

0         (-90.2, -30.4]
1         (-90.2, -30.4]
2      (-150.299, -90.2]
3          (-30.4, 29.4]
4          (-30.4, 29.4]
             ...        
995       (-90.2, -30.4]
996    (-150.299, -90.2]
997         (29.4, 89.2]
998    (-150.299, -90.2]
999       (-90.2, -30.4]
Name: z1, Length: 1000, dtype: category
Categories (5, interval[float64, right]): [(-150.299, -90.2] < (-90.2, -30.4] < (-30.4, 29.4] < (29.4, 89.2] < (89.2, 149.0]]

In [15]:
bin = [0, 0.2, 0.4 , 0.6 , 0.8 , 1 ]
pd.qcut(data['y1'],bin,labels=['1st','2nd', '3rd', '4th', '5th'])

0      5th
1      2nd
2      4th
3      5th
4      1st
      ... 
995    4th
996    2nd
997    2nd
998    4th
999    3rd
Name: y1, Length: 1000, dtype: category
Categories (5, object): ['1st' < '2nd' < '3rd' < '4th' < '5th']

### Binarization

In [17]:
data1 = [[2, -2, 1],
        [5, -5, 3],
        [1, 0, -1]]

df6 = pd.DataFrame(data1, columns = ['x1', 'x2', 'x3'])

# Use Binarizer to apply binarization

from sklearn.preprocessing import Binarizer
binr = Binarizer(threshold=0.0, copy=False)
scaled_df6 = binr.fit_transform(df6)
scaled_df6 = pd.DataFrame(scaled_df6, columns = ['x1', 'x2', 'x3'])

scaled_df6

Unnamed: 0,x1,x2,x3
0,1,0,1
1,1,0,1
2,1,0,0


## Sampling

In [16]:
data.sample(n=100)

Unnamed: 0,x1,y1,z1
946,-48.0,1.0,96.0
658,-25.0,45.0,-73.0
73,-18.0,10.0,-100.0
127,-94.0,-68.0,20.0
679,70.0,-57.0,72.0
...,...,...,...
325,-29.0,-53.0,-44.0
198,-90.0,44.0,123.0
88,-54.0,-38.0,125.0
224,44.0,41.0,46.0


## transformation