In [1]:
#Importing needed python modules
import numpy as np
import pandas as pd
import warnings as wr
#Ignoring warnings
from sklearn.exceptions import UndefinedMetricWarning
wr.filterwarnings("ignore", category=UndefinedMetricWarning)

In [3]:
df=pd.read_csv('Brain_tumor_data.csv')

In [4]:
print(df.head(10))#Print all data of top 10 rows
print(df.shape)#Print the row and clumn count of the data
print(df.isna().sum())#Print all columns with empty data along with sum of empty data

   Target       Mean     Variance  Standard Deviation   Entropy   Skewness  \
0       1  23.448517  2538.985627           50.388348  0.651174   1.984202   
1       1   4.398331   834.853030           28.893823  0.953532   6.495203   
2       1   3.244263   642.059166           25.338886  0.966065   7.772860   
3       0   8.511353  1126.214187           33.559115  0.868765   3.763142   
4       0  21.000793  2235.316978           47.279139  0.684724   1.936029   
5       0  11.350555   998.972243           31.606522  0.761106   2.533920   
6       1   0.405136    68.378718            8.269143  0.994724  20.388025   
7       1   5.955872   937.438650           30.617620  0.926931   5.015434   
8       1   6.184021   895.196827           29.919840  0.917259   4.707172   
9       1   0.260590    52.284893            7.230829  0.997061  27.722763   

     Kurtosis    Contrast    Energy       ASM  Homogeneity  Dissimilarity  \
0    5.421042  181.467713  0.781557  0.610831     0.847033      

In [5]:
df=df.dropna(axis=1)#Drop the column with empty data
#Encoding first column
from sklearn.preprocessing import LabelEncoder
labelencoder_X=LabelEncoder()
df.iloc[:,0]=labelencoder_X.fit_transform(df.iloc[:,0].values)
#Splitting data for dependence
X=df.iloc[:,1:].values
Y=df.iloc[:,0].values

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=1)

In [7]:
#Standard scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)

In [8]:
#Importing algorithm libraries
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [9]:
#Function for  different models
def models(X_train,Y_train):

    #Logistic regression
    log=LogisticRegression(random_state=0)
    log.fit(X_train,Y_train)

    #Decision tree
    tree=DecisionTreeClassifier(criterion='entropy',random_state=0)
    tree.fit(X_train,Y_train)

    #Random forest classifier
    forest=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    forest.fit(X_train,Y_train)

    #GaussianNB
    gnb = GaussianNB()
    gnb.fit(X_train,Y_train)

    #Printing accuracy
    print("Logistic regression:",log.score(X_train,Y_train))
    print("Decision Tree:",tree.score(X_train,Y_train))
    print("Random Forest:",forest.score(X_train,Y_train))
    print("GaussianNB:",gnb.score(X_train,Y_train))
    return log,tree,forest,gnb

In [10]:
#Testing Function for all models
model=models(X_train,Y_train)

Logistic regression: 0.9738493723849372
Decision Tree: 1.0
Random Forest: 1.0
GaussianNB: 0.9801255230125523


In [11]:
#Metrics of the models
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
for i in range(len(model)):
    print("\nModel:",i+1)
    print("Classification Report")
    print(classification_report(Y_test,model[i].predict(X_test)))
    print("Accuracy Score:",accuracy_score(Y_test,model[i].predict(X_test)))


Model: 1
Classification Report
              precision    recall  f1-score   support

           0       0.48      0.86      0.62        14
           1       0.99      0.96      0.97       305

    accuracy                           0.95       319
   macro avg       0.74      0.91      0.80       319
weighted avg       0.97      0.95      0.96       319

Accuracy Score: 0.9529780564263323

Model: 2
Classification Report
              precision    recall  f1-score   support

           0       0.43      0.93      0.59        14
           1       1.00      0.94      0.97       305

    accuracy                           0.94       319
   macro avg       0.71      0.94      0.78       319
weighted avg       0.97      0.94      0.95       319

Accuracy Score: 0.9435736677115988

Model: 3
Classification Report
              precision    recall  f1-score   support

           0       0.47      1.00      0.64        14
           1       1.00      0.95      0.97       305

    accuracy    

In [12]:
from sklearn.metrics import confusion_matrix

for i in range( len(model)):
  print('Model', i)
  cm = confusion_matrix(Y_test, model[i].predict(X_test))

  TP = cm[0][0]
  TN = cm[1][1]
  FN = cm[1][0]
  FP = cm[0][1]

  print(cm)
  print('Testing Accuracy = ',(TP + TN)/(TP + TN + FN + FP))
  print()

Model 0
[[ 12   2]
 [ 13 292]]
Testing Accuracy =  0.9529780564263323

Model 1
[[ 13   1]
 [ 17 288]]
Testing Accuracy =  0.9435736677115988

Model 2
[[ 14   0]
 [ 16 289]]
Testing Accuracy =  0.9498432601880877

Model 3
[[  0  14]
 [  0 305]]
Testing Accuracy =  0.9561128526645768

