In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score


In [12]:
# loading the data from csv file to a Pandas DataFrame
dermatology_data = pd.read_csv('dermatology.csv')

# printing the first 5 rows of the dataframe
dermatology_data.head()

# number of rows and columns in the dataframe
dermatology_data.shape

# getting more information about the dataset
dermatology_data.info()

# checking for missing values in each column
dermatology_data.isnull().sum()

# getting some statistical measures about the data
dermatology_data.describe()

# distribution of target Variable
dermatology_data['class'].value_counts()


# replace '?' with NaN
dermatology_data.replace('?', np.nan, inplace=True)

# convert age column to numeric
dermatology_data['age'] = pd.to_numeric(dermatology_data['age'])

# drop null values
dermatology_data.dropna(inplace=True)

# check missing values again
print(dermatology_data.isnull().sum())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 35 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   erythema                                  366 non-null    int64  
 1   scaling                                   366 non-null    int64  
 2   definite_borders                          366 non-null    int64  
 3   itching                                   366 non-null    int64  
 4   koebner_phenomenon                        366 non-null    int64  
 5   polygonal_papules                         366 non-null    int64  
 6   follicular_papules                        366 non-null    int64  
 7   oral_mucosal_involvement                  366 non-null    int64  
 8   knee_and_elbow_involvement                366 non-null    int64  
 9   scalp_involvement                         366 non-null    int64  
 10  family_history                        

In [13]:
X = dermatology_data.drop(columns=['class'], axis=1)
Y = dermatology_data['class']

print(X)
print(Y)


     erythema  scaling  definite_borders  itching  koebner_phenomenon  \
0           2        2                 0        3                   0   
1           3        3                 3        2                   1   
2           2        1                 2        3                   1   
3           2        2                 2        0                   0   
4           2        3                 2        2                   2   
..        ...      ...               ...      ...                 ...   
361         2        1                 1        0                   1   
362         3        2                 1        0                   1   
363         3        2                 2        2                   3   
364         2        1                 3        1                   2   
365         3        2                 2        0                   0   

     polygonal_papules  follicular_papules  oral_mucosal_involvement  \
0                    0                   0         

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=2
)

print(X.shape, X_train.shape, X_test.shape)


(358, 34) (286, 34) (72, 34)


In [15]:
model = svm.SVC(kernel='linear')

# training the SVM model with training data
model.fit(X_train, Y_train)


In [16]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
print('Accuracy score of training data : ', training_data_accuracy)

# accuracy score on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
print('Accuracy score of test data : ', test_data_accuracy)


Accuracy score of training data :  0.9965034965034965
Accuracy score of test data :  0.9722222222222222


In [18]:
input_data = (
    2,1,0,3,2,1,1,0,2,2,
    1,0,1,2,3,1,0,1,2,0,
    1,1,2,0,1,2,1,0,1,2,
    0, # perifollicular_parakeratosis
    0, # inflammatory_monoluclear_inflitrate
    0, # band-like_infiltrate
    45  # age
)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if prediction[0] == 1:
    print("Psoriasis")
elif prediction[0] == 2:
    print("Seborrheic Dermatitis")
elif prediction[0] == 3:
    print("Lichen Planus")
elif prediction[0] == 4:
    print("Pityriasis Rosea")
elif prediction[0] == 5:
    print("Chronic Dermatitis")
else:
    print("Pityriasis Rubra Pilaris")

[1]
Psoriasis




In [19]:
import pickle

filename = 'dermatology_model.sav'
pickle.dump(model, open(filename, 'wb'))

# loading the saved model
loaded_model = pickle.load(open('dermatology_model.sav', 'rb'))


In [20]:
for column in X.columns:
    print(column)


erythema
scaling
definite_borders
itching
koebner_phenomenon
polygonal_papules
follicular_papules
oral_mucosal_involvement
knee_and_elbow_involvement
scalp_involvement
family_history
melanin_incontinence
eosinophils_in_the_infiltrate
pnl_infiltrate
fibrosis_of_the_papillary_dermis
exocytosis
acanthosis
hyperkeratosis
parakeratosis
clubbing_of_the_rete_ridges
elongation_of_the_rete_ridges
thinning_of_the_suprapapillary_epidermis
spongiform_pustule
munro_microabcess
focal_hypergranulosis
disappearance_of_the_granular_layer
vacuolisation_and_damage_of_basal_layer
spongiosis
saw-tooth_appearance_of_retes
follicular_horn_plug
perifollicular_parakeratosis
inflammatory_monoluclear_inflitrate
band-like_infiltrate
age
