In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.ensemble import RandomForestClassifier as rfc
from sklearn import metrics as mtx
from timeit import default_timer as timer

#enter testset size 
testsize=20
testsize=testsize/100
print("Training set :",(1-testsize)*100,"%")
print("Test set     :",testsize*100,"%","\n")


#importing dataset
df=pd.read_csv('cars_dataset.csv')


#dictionary for encoding attribute values
encode_dictionary = {"buying":{"vhigh":1,"high":2,"med":3,"low":4},
                     "maint":{"vhigh":1,"high":2,"med":3,"low":4},
                     "doors":{"two":1,"three":2,"four":3,"5more":4},
                     "persons":{"two":1,"four":2,"more":3},
                     "lug_boot":{"small":1,"med":2,"big":3},
                     "safety":{"low":1,"med":2,"high":3},
                     "car":{"unacc":1,"acc":2,"good":3,"vgood":4} }


#encoding using dictionary    
df.replace(encode_dictionary,inplace=True)  


#X is the dataset without class label
X=df.drop(columns='car')



#y contains all the class labels only
y=df['car'].values

#shuffled split
X_train,X_test,y_train,y_test=tts(X,y,test_size=testsize,random_state=None,stratify=y)

#serial split
#X_train,X_test,y_train,y_test=tts(X,y,test_size=testsize,random_state=0,shuffle=False)


start=timer()
#building model
obj=rfc(criterion="gini",random_state=1,n_estimators=100)
obj.fit(X_train,y_train)
end=timer()

train_time=end-start
start=timer()
#prediction
y_pred=obj.predict(X_test)
end=timer()
test_time=end-start

print("Total number of touples: ",df.shape[0])
print("Touples in training set: ",y_train.shape[0])
print("Touples in testing set : ",y_test.shape[0],"\n")


#confusion matrix
cm = mtx.confusion_matrix(y_test,y_pred,labels=[1,2,3,4])


labels=['unacc','acc','vgood','good']
print("Confusion Matrix:","\n","Predicted","\n",labels,"\n",cm,"\n")
print("Accuracy: ",mtx.accuracy_score(y_test,y_pred)*100,'%',"\n")

#count number of row in training set
testing=y_test.shape[0]

for x in range (4):
    recaldiv=0     #divisor for recall
    precisiondiv=0 #divisor for precision
    FPup=0         #numerator for false positive
    FPdown= 0      #denominator for fp
    
    
    for y in range (4) :
        # add all element of the row of current class,actual number of element in class 
        # denoted by row x
        recaldiv=recaldiv+cm[x][y]                  
        precisiondiv=precisiondiv+cm[y][x] #add all element of column,all that has been predicted as current class
        if x!=y :   #incorrect prediction,not in x,x position of a column
            FPup=FPup+cm[y][x] 
            
            
        
    FPdown=testing-recaldiv #those that are actually false,all in cm except actually in current class (recaldiv)
                             
    
    if cm[x][x] == 0 : #cm[x][x] is TP
        recall=0
        if precisiondiv == 0: 
            prec= "?"
        else:
            prec=0
            
        f1="?"
          
    else: 
        
        recall=cm[x][x]/recaldiv
        fprate=FPup/FPdown
        prec=cm[x][x]/precisiondiv
        f1=2*(recall*prec)/(recall+prec)
    
    print("Recall for class : ",labels[x],"= ",recall)
    print("FP rate for class : ",labels[x],"= ",fprate)
    print("Precision for class : ",labels[x],"= ",prec)
    print("F1 for class : ",labels[x],"= ",f1)
    print("\n")
print("Training Time ",train_time)
print("Testing Time ",test_time)
#print(mtx.classification_report(y_test,y_pred))

Training set : 80.0 %
Test set     : 20.0 % 

Total number of touples:  1728
Touples in training set:  1382
Touples in testing set :  346 

Confusion Matrix: 
 Predicted 
 ['unacc', 'acc', 'vgood', 'good'] 
 [[241   1   0   0]
 [  1  75   1   0]
 [  0   0  14   0]
 [  0   0   1  12]] 

Accuracy:  98.84393063583815 % 

Recall for class :  unacc =  0.9958677685950413
FP rate for class :  unacc =  0.009615384615384616
Precision for class :  unacc =  0.9958677685950413
F1 for class :  unacc =  0.9958677685950413


Recall for class :  acc =  0.974025974025974
FP rate for class :  acc =  0.0037174721189591076
Precision for class :  acc =  0.9868421052631579
F1 for class :  acc =  0.9803921568627451


Recall for class :  vgood =  1.0
FP rate for class :  vgood =  0.006024096385542169
Precision for class :  vgood =  0.875
F1 for class :  vgood =  0.9333333333333333


Recall for class :  good =  0.9230769230769231
FP rate for class :  good =  0.0
Precision for class :  good =  1.0
F1 for class 