# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# Loading and Studying dataset

In [2]:
data = pd.read_csv('cell_samples.csv')

In [3]:
data.head()

Unnamed: 0,ID,Clump,UnifSize,UnifShape,MargAdh,SingEpiSize,BareNuc,BlandChrom,NormNucl,Mit,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


In [4]:
data.shape

(699, 11)

In [5]:
data['Class'].value_counts()

2    458
4    241
Name: Class, dtype: int64

# Feature Transformation

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   ID           699 non-null    int64 
 1   Clump        699 non-null    int64 
 2   UnifSize     699 non-null    int64 
 3   UnifShape    699 non-null    int64 
 4   MargAdh      699 non-null    int64 
 5   SingEpiSize  699 non-null    int64 
 6   BareNuc      699 non-null    object
 7   BlandChrom   699 non-null    int64 
 8   NormNucl     699 non-null    int64 
 9   Mit          699 non-null    int64 
 10  Class        699 non-null    int64 
dtypes: int64(10), object(1)
memory usage: 60.2+ KB


In [7]:
data = data[pd.to_numeric(data['BareNuc'], errors='coerce').notnull()]

In [8]:
data.head()

Unnamed: 0,ID,Clump,UnifSize,UnifShape,MargAdh,SingEpiSize,BareNuc,BlandChrom,NormNucl,Mit,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


In [9]:
data['BareNuc'] = data['BareNuc'].astype('int64')

In [10]:
data.dtypes

ID             int64
Clump          int64
UnifSize       int64
UnifShape      int64
MargAdh        int64
SingEpiSize    int64
BareNuc        int64
BlandChrom     int64
NormNucl       int64
Mit            int64
Class          int64
dtype: object

# X and y matrix

In [11]:
X = data.drop(['ID','Class'],axis=1)
y = data.Class

In [12]:
print("X Shape:",X.shape)
print("y Shape:",y.shape)

X Shape: (683, 9)
y Shape: (683,)


# Splitting into train and test datasets

In [13]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=43)

# Support Vector Classification Model

### Instantiating and fitting the model

In [15]:
from sklearn.svm import SVC
svc = SVC()

In [16]:
svc.fit(X_train,y_train)

### Evaluating the model

In [17]:
from sklearn.metrics import accuracy_score,f1_score
y_pred = svc.predict(X_test)
print("Accuracy Score:{:.2f}%".format(accuracy_score(y_test,y_pred)*100))

Accuracy Score:97.08%


In [19]:
print("F1 Score:",f1_score(y_test,y_pred,average='weighted'))

F1 Score: 0.9708029197080292


# Predicting the result

In [23]:
test = data.iloc[2]
test = test[1:10]
test

Clump          3
UnifSize       1
UnifShape      1
MargAdh        1
SingEpiSize    2
BareNuc        2
BlandChrom     3
NormNucl       1
Mit            1
Name: 2, dtype: int64

In [24]:
svc.predict([test])

array([2], dtype=int64)