# UTBM AD50 TP

## Unsupervised Learning

*Adrien Bouyssou*

### 1. Imports

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
import pandas

### 2. Preparations

In [2]:
cancer = load_breast_cancer()

columns = cancer.feature_names
target = cancer.target

dataset = pandas.DataFrame(cancer.data, columns=columns)
dataset.head()


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


### 3. Original dataset reprensentations

In [3]:
def create_representation_dataset(dataset_to_represent, elements_in_the_dataset):
    representations = []

    for line in dataset_to_represent.values:
        representation = {}

        for index, element in enumerate(elements_in_the_dataset):
            representation[element] = {
                'mean': line[index],
                'error': line[index + len(elements_in_the_dataset)],
                'worst': line[index + 2 * len(elements_in_the_dataset)]
            }

        representations.append(representation)

    return pandas.DataFrame(representations, columns=elements_in_the_dataset)

number_of_elements = 10
# Take elements labels
elements = [' '.join(element.split()[1:]) for element in columns[:number_of_elements]]

representations = create_representation_dataset(dataset, elements)
representations.head()

Unnamed: 0,radius,texture,perimeter,area,smoothness,compactness,concavity,concave points,symmetry,fractal dimension
0,"{'mean': 17.99, 'error': 1.095, 'worst': 25.38}","{'mean': 10.38, 'error': 0.9053, 'worst': 17.33}","{'mean': 122.8, 'error': 8.589, 'worst': 184.6}","{'mean': 1001.0, 'error': 153.4, 'worst': 2019.0}","{'mean': 0.1184, 'error': 0.006399, 'worst': 0...","{'mean': 0.2776, 'error': 0.04904, 'worst': 0....","{'mean': 0.3001, 'error': 0.05373, 'worst': 0....","{'mean': 0.1471, 'error': 0.01587, 'worst': 0....","{'mean': 0.2419, 'error': 0.03003, 'worst': 0....","{'mean': 0.07871, 'error': 0.006193, 'worst': ..."
1,"{'mean': 20.57, 'error': 0.5435, 'worst': 24.99}","{'mean': 17.77, 'error': 0.7339, 'worst': 23.41}","{'mean': 132.9, 'error': 3.398, 'worst': 158.8}","{'mean': 1326.0, 'error': 74.08, 'worst': 1956.0}","{'mean': 0.08474, 'error': 0.005225, 'worst': ...","{'mean': 0.07864, 'error': 0.01308, 'worst': 0...","{'mean': 0.0869, 'error': 0.0186, 'worst': 0.2...","{'mean': 0.07017, 'error': 0.0134, 'worst': 0....","{'mean': 0.1812, 'error': 0.01389, 'worst': 0....","{'mean': 0.05667, 'error': 0.003532, 'worst': ..."
2,"{'mean': 19.69, 'error': 0.7456, 'worst': 23.57}","{'mean': 21.25, 'error': 0.7869, 'worst': 25.53}","{'mean': 130.0, 'error': 4.585, 'worst': 152.5}","{'mean': 1203.0, 'error': 94.03, 'worst': 1709.0}","{'mean': 0.1096, 'error': 0.00615, 'worst': 0....","{'mean': 0.1599, 'error': 0.04006, 'worst': 0....","{'mean': 0.1974, 'error': 0.03832, 'worst': 0....","{'mean': 0.1279, 'error': 0.02058, 'worst': 0....","{'mean': 0.2069, 'error': 0.0225, 'worst': 0.3...","{'mean': 0.05999, 'error': 0.004571, 'worst': ..."
3,"{'mean': 11.42, 'error': 0.4956, 'worst': 14.91}","{'mean': 20.38, 'error': 1.156, 'worst': 26.5}","{'mean': 77.58, 'error': 3.445, 'worst': 98.87}","{'mean': 386.1, 'error': 27.23, 'worst': 567.7}","{'mean': 0.1425, 'error': 0.00911, 'worst': 0....","{'mean': 0.2839, 'error': 0.07458, 'worst': 0....","{'mean': 0.2414, 'error': 0.05661, 'worst': 0....","{'mean': 0.1052, 'error': 0.01867, 'worst': 0....","{'mean': 0.2597, 'error': 0.05963, 'worst': 0....","{'mean': 0.09744, 'error': 0.009208, 'worst': ..."
4,"{'mean': 20.29, 'error': 0.7572, 'worst': 22.54}","{'mean': 14.34, 'error': 0.7813, 'worst': 16.67}","{'mean': 135.1, 'error': 5.438, 'worst': 152.2}","{'mean': 1297.0, 'error': 94.44, 'worst': 1575.0}","{'mean': 0.1003, 'error': 0.01149, 'worst': 0....","{'mean': 0.1328, 'error': 0.02461, 'worst': 0....","{'mean': 0.198, 'error': 0.05688, 'worst': 0.4}","{'mean': 0.1043, 'error': 0.01885, 'worst': 0....","{'mean': 0.1809, 'error': 0.01756, 'worst': 0....","{'mean': 0.05883, 'error': 0.005115, 'worst': ..."


### 4. Transform and reduce the dataset

In [4]:
scaler = StandardScaler()
transformed_dataset = pandas.DataFrame(scaler.fit_transform(dataset), columns=columns)

transformed_dataset.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,1.097064,-2.073335,1.269934,0.984375,1.568466,3.283515,2.652874,2.532475,2.217515,2.255747,...,1.88669,-1.359293,2.303601,2.001237,1.307686,2.616665,2.109526,2.296076,2.750622,1.937015
1,1.829821,-0.353632,1.685955,1.908708,-0.826962,-0.487072,-0.023846,0.548144,0.001392,-0.868652,...,1.805927,-0.369203,1.535126,1.890489,-0.375612,-0.430444,-0.146749,1.087084,-0.24389,0.28119
2,1.579888,0.456187,1.566503,1.558884,0.94221,1.052926,1.363478,2.037231,0.939685,-0.398008,...,1.51187,-0.023974,1.347475,1.456285,0.527407,1.082932,0.854974,1.955,1.152255,0.201391
3,-0.768909,0.253732,-0.592687,-0.764464,3.283553,3.402909,1.915897,1.451707,2.867383,4.910919,...,-0.281464,0.133984,-0.249939,-0.550021,3.394275,3.893397,1.989588,2.175786,6.046041,4.93501
4,1.750297,-1.151816,1.776573,1.826229,0.280372,0.53934,1.371011,1.428493,-0.00956,-0.56245,...,1.298575,-1.46677,1.338539,1.220724,0.220556,-0.313395,0.613179,0.729259,-0.868353,-0.3971


In [5]:
representation_of_the_scaled_dataset = create_representation_dataset(transformed_dataset, elements)
representation_of_the_scaled_dataset.head()

Unnamed: 0,radius,texture,perimeter,area,smoothness,compactness,concavity,concave points,symmetry,fractal dimension
0,"{'mean': 1.0970639814699807, 'error': 2.489733...","{'mean': -2.0733350146975935, 'error': -0.5652...","{'mean': 1.2699336881399383, 'error': 2.833030...","{'mean': 0.9843749048031144, 'error': 2.487577...","{'mean': 1.568466329243428, 'error': -0.214001...","{'mean': 3.2835146709868264, 'error': 1.316861...","{'mean': 2.652873983743168, 'error': 0.7240261...","{'mean': 2.532475216403245, 'error': 0.6608199...","{'mean': 2.2175150059646405, 'error': 1.148756...","{'mean': 2.255746885296269, 'error': 0.9070830..."
1,"{'mean': 1.8298206075464458, 'error': 0.499254...","{'mean': -0.35363240824381126, 'error': -0.876...","{'mean': 1.6859547105508974, 'error': 0.263326...","{'mean': 1.9087082542365938, 'error': 0.742401...","{'mean': -0.8269624468508425, 'error': -0.6053...","{'mean': -0.48707167257589423, 'error': -0.692...","{'mean': -0.023845855198769264, 'error': -0.44...","{'mean': 0.5481441558908369, 'error': 0.260162...","{'mean': 0.0013923632994608738, 'error': -0.80...","{'mean': -0.8686524574634664, 'error': -0.0994..."
2,"{'mean': 1.5798881149312178, 'error': 1.228675...","{'mean': 0.4561869517641946, 'error': -0.78008...","{'mean': 1.5665031298586416, 'error': 0.850928...","{'mean': 1.5588836327586924, 'error': 1.181336...","{'mean': 0.9422104400684553, 'error': -0.29700...","{'mean': 1.05292554434161, 'error': 0.81497350...","{'mean': 1.3634784515699176, 'error': 0.213076...","{'mean': 2.0372307557008114, 'error': 1.424827...","{'mean': 0.939684816618985, 'error': 0.2370355...","{'mean': -0.3980079103689868, 'error': 0.29355..."
3,"{'mean': -0.7689092872596208, 'error': 0.32637...","{'mean': 0.25373211176219296, 'error': -0.1104...","{'mean': -0.5926871666544732, 'error': 0.28659...","{'mean': -0.7644637923250287, 'error': -0.2883...","{'mean': 3.283553480279431, 'error': 0.6897016...","{'mean': 3.402908991274548, 'error': 2.7442804...","{'mean': 1.9158971800569968, 'error': 0.819518...","{'mean': 1.451707356849496, 'error': 1.1150070...","{'mean': 2.867382930831859, 'error': 4.7326803...","{'mean': 4.9109192850190375, 'error': 2.047510..."
4,"{'mean': 1.7502966326234184, 'error': 1.270542...","{'mean': -1.1518164326195182, 'error': -0.7902...","{'mean': 1.7765731510760563, 'error': 1.273189...","{'mean': 1.826229278440991, 'error': 1.1903567...","{'mean': 0.2803718299176319, 'error': 1.483067...","{'mean': 0.5393404523102987, 'error': -0.04851...","{'mean': 1.3710114342311053, 'error': 0.828470...","{'mean': 1.4284927727540695, 'error': 1.144204...","{'mean': -0.009560466894930265, 'error': -0.36...","{'mean': -0.562449981040552, 'error': 0.499328..."
