> **Principal Component Analysis (PCA) method applied to the Breast Cancer data set. 
I only want to visualized the features that really matters in predicting breast cancer**

In [None]:
import numpy as np 
import pandas as pd 

df = pd.read_csv("../input/data.csv")

**I focused in the features and the labels only and drop the other columns**

In [None]:
df = df.drop(['id', 'Unnamed: 32'], axis=1)

In [None]:
df.head()

**With the principal component analysis we only use the features, since PCA  is a unsupervised machine learning method.**

In [None]:
df_features = df.drop(['diagnosis'], axis=1)

**Standardize the data of the features**

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
standardized = StandardScaler()

In [None]:
standardized.fit(df_features)

In [None]:
scaled_data = standardized.transform(df_features)

**I used 3 principal components decomposition of the 30 features**

In [None]:
#PCA
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=3)

In [None]:
pca.fit(scaled_data)

In [None]:
x_pca = pca.transform(scaled_data)

In [None]:
scaled_data.shape

**Each column is a principal component**

In [None]:
x_pca.shape

**I make a series of the diagnosis value**

In [None]:
def diag(x):
    if x =='M':
        return 1
    else:
        return 0
df_diag= df['diagnosis'].apply(diag)

**I plot the principal components  to see the relations **

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
x_pca[:1]

In [None]:
fig = plt.figure(figsize=(15, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x_pca[:,0], x_pca[:,1], x_pca[:,2], c=df_diag, s=60)
ax.legend(['Malign'])
ax.set_xlabel('First Principal Component')
ax.set_ylabel('Second Principal Component')
ax.set_zlabel('Third Principal Component')
ax.view_init(30, 120)

In [None]:
ax = plt.figure(figsize=(12,8))
sns.scatterplot(x_pca[:,0], x_pca[:,2],hue=df['diagnosis'], palette ='Set1' )
plt.xlabel('First Principal Component')
plt.ylabel('Third Principal Component')

In [None]:
ax = plt.figure(figsize=(12,8))
sns.scatterplot(x_pca[:,1], x_pca[:,2],hue=df['diagnosis'], palette ='Set1' )
plt.xlabel('Second Principal Component')
plt.ylabel('Third Principal Component')

In [None]:
ax = plt.figure(figsize=(12,8))
sns.scatterplot(x_pca[:,0], x_pca[:,1],hue=df['diagnosis'], palette ='Set1' )
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')

In [None]:
df_pc = pd.DataFrame(pca.components_, columns = df_features.columns)

In [None]:
df_pc

In [None]:
plt.figure(figsize=(15, 8))
sns.heatmap(df_pc, cmap='viridis')
plt.title('Principal Components correlation with the features')
plt.xlabel('Features')
plt.ylabel('Principal Components')