<a href="https://colab.research.google.com/github/simha13/Prostate-Cancer-Detection/blob/main/Prostate_Cancer_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

Data Collection & Processing

In [None]:
prostate_cancer_dataset = pd.read_csv('/content/Prostate_Cancer.csv')

In [None]:
print(prostate_cancer_dataset)

     id diagnosis_result  radius  texture  perimeter  area  smoothness  \
0     1                M      23       12        151   954       0.143   
1     2                B       9       13        133  1326       0.143   
2     3                M      21       27        130  1203       0.125   
3     4                M      14       16         78   386       0.070   
4     5                M       9       19        135  1297       0.141   
..  ...              ...     ...      ...        ...   ...         ...   
95   96                M      23       16        132  1264       0.091   
96   97                B      22       14         78   451       0.105   
97   98                B      19       27         62   295       0.102   
98   99                B      21       24         74   413       0.090   
99  100                M      16       27         94   643       0.098   

    compactness  symmetry  fractal_dimension  
0         0.278     0.242              0.079  
1         0.079  

In [None]:
prostate_cancer_dataset.head()

Unnamed: 0,id,diagnosis_result,radius,texture,perimeter,area,smoothness,compactness,symmetry,fractal_dimension
0,1,M,23,12,151,954,0.143,0.278,0.242,0.079
1,2,B,9,13,133,1326,0.143,0.079,0.181,0.057
2,3,M,21,27,130,1203,0.125,0.16,0.207,0.06
3,4,M,14,16,78,386,0.07,0.284,0.26,0.097
4,5,M,9,19,135,1297,0.141,0.133,0.181,0.059


In [None]:
prostate_cancer_dataset.shape

(100, 10)

In [None]:
prostate_cancer_dataset.describe

<bound method NDFrame.describe of      id diagnosis_result  radius  texture  perimeter  area  smoothness  \
0     1                M      23       12        151   954       0.143   
1     2                B       9       13        133  1326       0.143   
2     3                M      21       27        130  1203       0.125   
3     4                M      14       16         78   386       0.070   
4     5                M       9       19        135  1297       0.141   
..  ...              ...     ...      ...        ...   ...         ...   
95   96                M      23       16        132  1264       0.091   
96   97                B      22       14         78   451       0.105   
97   98                B      19       27         62   295       0.102   
98   99                B      21       24         74   413       0.090   
99  100                M      16       27         94   643       0.098   

    compactness  symmetry  fractal_dimension  
0         0.278     0.242     

In [None]:
prostate_cancer_dataset['diagnosis_result'].value_counts()

M    62
B    38
Name: diagnosis_result, dtype: int64

In [None]:
prostate_cancer_dataset.groupby('diagnosis_result').mean()

Unnamed: 0_level_0,id,radius,texture,perimeter,area,smoothness,compactness,symmetry,fractal_dimension
diagnosis_result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
B,62.184211,17.947368,17.763158,78.5,474.342105,0.099053,0.086895,0.184053,0.064605
M,43.33871,16.177419,18.516129,107.983871,842.951613,0.104984,0.151097,0.198758,0.064742


In [None]:
# separating the data and labels
X = prostate_cancer_dataset.drop(columns = ['id','diagnosis_result'], axis=1)
Y = prostate_cancer_dataset['diagnosis_result']

In [None]:
print(X)

    radius  texture  perimeter  area  smoothness  compactness  symmetry  \
0       23       12        151   954       0.143        0.278     0.242   
1        9       13        133  1326       0.143        0.079     0.181   
2       21       27        130  1203       0.125        0.160     0.207   
3       14       16         78   386       0.070        0.284     0.260   
4        9       19        135  1297       0.141        0.133     0.181   
..     ...      ...        ...   ...         ...          ...       ...   
95      23       16        132  1264       0.091        0.131     0.210   
96      22       14         78   451       0.105        0.071     0.190   
97      19       27         62   295       0.102        0.053     0.135   
98      21       24         74   413       0.090        0.075     0.162   
99      16       27         94   643       0.098        0.114     0.188   

    fractal_dimension  
0               0.079  
1               0.057  
2               0.060  
3  

In [None]:
print(Y)

0     M
1     B
2     M
3     M
4     M
     ..
95    M
96    B
97    B
98    B
99    M
Name: diagnosis_result, Length: 100, dtype: object


Separating the features and target

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

In [None]:
standardized_data = scaler.transform(X)

In [None]:
print(standardized_data)

[[ 1.26683005e+00 -1.20574648e+00  2.30161118e+00  7.89416627e-01
   2.76420965e+00  2.48697022e+00  1.59415115e+00  1.76446414e+00]
 [-1.61701072e+00 -1.01220772e+00  1.53752042e+00  1.95882960e+00
   2.76420965e+00 -7.84061332e-01 -3.97313527e-01 -9.48199109e-01]
 [ 8.54852801e-01  1.69733493e+00  1.41017196e+00  1.57216886e+00
   1.52865530e+00  5.47363571e-01  4.51507484e-01 -5.78290484e-01]
 [-5.87067586e-01 -4.31591436e-01 -7.97201366e-01 -9.96138662e-01
  -2.24664966e+00  2.58559429e+00  2.18179647e+00  3.98391589e+00]
 [-1.61701072e+00  1.49024846e-01  1.62241939e+00  1.86766568e+00
   2.62692583e+00  1.03555270e-01 -3.97313527e-01 -7.01593359e-01]
 [ 1.67880731e+00  1.31025741e+00 -5.84953931e-01 -7.10072586e-01
   1.73458102e+00  7.11737016e-01  5.16801408e-01  1.39455552e+00]
 [-1.75090333e-01  1.50379617e+00  9.85677088e-01  1.05976479e+00
  -5.30601951e-01 -2.90940997e-01 -4.62607451e-01 -9.48199109e-01]
 [-3.81078959e-01 -4.45139150e-02 -2.87807522e-01 -3.92570677e-01
   

In [None]:
X = standardized_data
Y = prostate_cancer_dataset['diagnosis_result']

In [None]:
print(X)
print(Y)

[[ 1.26683005e+00 -1.20574648e+00  2.30161118e+00  7.89416627e-01
   2.76420965e+00  2.48697022e+00  1.59415115e+00  1.76446414e+00]
 [-1.61701072e+00 -1.01220772e+00  1.53752042e+00  1.95882960e+00
   2.76420965e+00 -7.84061332e-01 -3.97313527e-01 -9.48199109e-01]
 [ 8.54852801e-01  1.69733493e+00  1.41017196e+00  1.57216886e+00
   1.52865530e+00  5.47363571e-01  4.51507484e-01 -5.78290484e-01]
 [-5.87067586e-01 -4.31591436e-01 -7.97201366e-01 -9.96138662e-01
  -2.24664966e+00  2.58559429e+00  2.18179647e+00  3.98391589e+00]
 [-1.61701072e+00  1.49024846e-01  1.62241939e+00  1.86766568e+00
   2.62692583e+00  1.03555270e-01 -3.97313527e-01 -7.01593359e-01]
 [ 1.67880731e+00  1.31025741e+00 -5.84953931e-01 -7.10072586e-01
   1.73458102e+00  7.11737016e-01  5.16801408e-01  1.39455552e+00]
 [-1.75090333e-01  1.50379617e+00  9.85677088e-01  1.05976479e+00
  -5.30601951e-01 -2.90940997e-01 -4.62607451e-01 -9.48199109e-01]
 [-3.81078959e-01 -4.45139150e-02 -2.87807522e-01 -3.92570677e-01
   

Splitting the data into training data & Testing data

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

(100, 8) (80, 8) (20, 8)


Model Training

In [None]:
model = SVC()

In [None]:
# training the Logistic Regression model using Training data

model.fit(X_train, Y_train)

Model Evaluation

Accuracy Score

In [None]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [None]:
print('Accuracy on training data = ', training_data_accuracy)

Accuracy on training data =  0.8875


In [None]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [None]:
print('Accuracy on test data = ', test_data_accuracy)

Accuracy on test data =  0.95
