In [7]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
cancer = load_breast_cancer()

X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target, random_state =1)

print(X_train.shape)
print(X_test.shape)

(426, 30)
(143, 30)


# preprocessing the data

In [8]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

In [9]:
scaler.fit(X_train)

In [10]:
# transforming the train data 
X_train_scaled = scaler.transform(X_train)
# minimun before and after 
print("transformed shape {}".format(X_train_scaled.shape))
print("pre-feature minimum before scaling:\n{}".format(X_train.min(axis=0)))
print("pre-feature maximum before scaling:\n{}".format(X_train.max(axis=0)))
print("pre-feature minium after scaling:\n{}".format(X_train_scaled.min(axis=0)))
print("pre-feature maximum after scaling:\n{}".format(X_train_scaled.max(axis=0)))

transformed shape (426, 30)
pre-feature minimum before scaling:
[6.981e+00 9.710e+00 4.379e+01 1.435e+02 5.263e-02 1.938e-02 0.000e+00
 0.000e+00 1.060e-01 5.024e-02 1.153e-01 3.602e-01 7.570e-01 6.802e+00
 1.713e-03 2.252e-03 0.000e+00 0.000e+00 9.539e-03 8.948e-04 7.930e+00
 1.202e+01 5.041e+01 1.852e+02 7.117e-02 2.729e-02 0.000e+00 0.000e+00
 1.566e-01 5.521e-02]
pre-feature maximum before scaling:
[2.811e+01 3.928e+01 1.885e+02 2.501e+03 1.634e-01 2.867e-01 4.268e-01
 2.012e-01 3.040e-01 9.575e-02 2.873e+00 4.885e+00 2.198e+01 5.422e+02
 3.113e-02 1.354e-01 3.960e-01 5.279e-02 6.146e-02 2.984e-02 3.604e+01
 4.954e+01 2.512e+02 4.254e+03 2.226e-01 9.379e-01 1.170e+00 2.910e-01
 5.774e-01 1.486e-01]
pre-feature minium after scaling:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
pre-feature maximum after scaling:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]


In [11]:
# transforming the test data 
X_test_scaled = scaler.transform(X_test)
# minimun before and after 
print("transformed shape {}".format(X_test_scaled.shape))
print("pre-feature minimum before scaling:\n{}".format(X_test.min(axis=0)))
print("pre-feature maximum before scaling:\n{}".format(X_test.max(axis=0)))
print("pre-feature minium after scaling:\n{}".format(X_test_scaled.min(axis=0)))
print("pre-feature maximum after scaling:\n{}".format(X_test_scaled.max(axis=0)))

transformed shape (143, 30)
pre-feature minimum before scaling:
[7.691e+00 1.038e+01 4.834e+01 1.704e+02 6.828e-02 3.116e-02 0.000e+00
 0.000e+00 1.365e-01 4.996e-02 1.115e-01 3.871e-01 8.484e-01 7.228e+00
 2.866e-03 3.746e-03 0.000e+00 0.000e+00 7.882e-03 1.087e-03 8.678e+00
 1.420e+01 5.449e+01 2.236e+02 8.774e-02 5.131e-02 0.000e+00 0.000e+00
 1.565e-01 5.504e-02]
pre-feature maximum before scaling:
[2.722e+01 3.381e+01 1.821e+02 2.250e+03 1.425e-01 3.454e-01 3.754e-01
 1.878e-01 2.906e-01 9.744e-02 1.292e+00 2.612e+00 1.012e+01 1.587e+02
 1.604e-02 1.006e-01 3.038e-01 3.322e-02 7.895e-02 1.220e-02 3.312e+01
 4.178e+01 2.208e+02 3.216e+03 2.098e-01 1.058e+00 1.252e+00 2.688e-01
 6.638e-01 2.075e-01]
pre-feature minium after scaling:
[ 0.0336031   0.0226581   0.03144219  0.01141039  0.14128374  0.04406704
  0.          0.          0.1540404  -0.00615249 -0.00137796  0.00594501
  0.00430665  0.00079567  0.03919502  0.0112206   0.          0.
 -0.03191387  0.00664013  0.02660975  0.058

# scaling the data

In [12]:
from sklearn.datasets import make_blobs
# make synthetic data 
X,_ = make_blobs(n_samples = 50,centers = 5,random_state = 4, )