## Import dependencies

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
# %matplotlib inline 

## Load data

In [None]:
# Here we are using inbuilt dataset of scikit learn 
from sklearn.datasets import load_breast_cancer 

# instantiating 
cancer = load_breast_cancer() 

# creating dataframe 
df = pd.DataFrame(cancer['data'], columns = cancer['feature_names']) 

# checking head of dataframe 
df.head() 

## Computing PCA

In [None]:
# Importing standardscalar module 
from sklearn.preprocessing import StandardScaler 

scalar = StandardScaler() 

# fitting 
scalar.fit(df) 
scaled_data = scalar.transform(df) 

# Importing PCA 
from sklearn.decomposition import PCA 

# Let's say, components = 2 
pca = PCA(n_components = 3) 
pca.fit(scaled_data) 
x_pca = pca.transform(scaled_data) 

x_pca.shape 


## Visualization of reduced dataset

In [None]:
# giving a larger plot 
plt.figure(figsize =(8, 6)) 

plt.scatter(x_pca[:, 0], x_pca[:, 1], c = cancer['target'], cmap ='plasma') 

# labeling x and y axes 
plt.xlabel('First Principal Component') 
plt.ylabel('Second Principal Component') 


# Training with reduced dataset

In [None]:
df_x=pd.DataFrame(x_pca[:, 0:2])
df_y=pd.DataFrame(cancer['target'])
df_x.shape
# df_y.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
model1 = LogisticRegression(solver='lbfgs', max_iter=10000)

x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.25, random_state = 55)

%time
model1 = model1.fit(x_train, y_train.values.ravel())
y_predict = model1.predict(x_test)

In [None]:
print("model score: ", model1.score(x_test, y_test))
print('### testing on test data:')
print(classification_report(y_test, y_predict))
print('### testing on training data:')
print(classification_report(y_train, model1.predict(x_train)))

# Training with original data

In [None]:
df_x=pd.DataFrame(df)
df_y=pd.DataFrame(cancer['target'])
df_x.shape
# df_y.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
model2 = LogisticRegression(solver='lbfgs', max_iter=10000)

x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.25, random_state = 55)

%time
model2.fit(x_train, y_train.values.ravel())
y_predict = model2.predict(x_test)

##Test Report

In [None]:
print("model score: ", model2.score(x_test, y_test))
print('### testing on test data:')
print(classification_report(y_test, y_predict))
print('### testing on training data:')
print(classification_report(y_train, model2.predict(x_train)))