In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import sklearn
from sklearn.preprocessing import LabelEncoder
from IPython.display import display
from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix
from sklearn import tree
import tensorflow as tf
import pickle


class ConfidenceEstimationModel:
    
    __dataset_name = "Dataset.xlsx"
    
    def load_dataset(self):
        self.dataset = pd.read_excel(self.__dataset_name)
        
    def eda(self):
        display(self.dataset.head())
        display(self.dataset.info())
        display(self.dataset["Label"].value_counts())
        print("Length of Dataset : ", len(self.dataset))
        
        #NULL values in each column
        print("NULL Values : ")
        display(self.dataset.isnull().sum())
        print("\n\n")
        
        #value count of each column
        for col in self.dataset.columns:
            print("Column Name: {}".format(col))
            display(self.dataset[col].value_counts())
            print("\n\n")
            
    def pre_process(self):
        self.dataset.dropna(inplace=True)
        display(self.dataset.info())
        
        
        
        self.encoder = LabelEncoder()
        for col in self.dataset.columns:
            if col != "Label":
                self.dataset[col] = self.encoder.fit_transform(self.dataset[col])
                
        for col in self.dataset.columns:
            print("Column Name: {}".format(col))
            display(self.dataset[col].value_counts())
            print("\n\n")
            
        self.dataset = self.dataset.astype(float)
        print("datatype: \n",self.dataset.dtypes)
            
        #dict1 = {"angry": [0], "fear": [1], "happy": [2], "neutral": [3], "sad": [4], "surprise": [5]}
        #df = pd.DataFrame.from_dict(dict)
        #df.to_csv("express")
        
    
    def splitting(self):
        self.y = self.dataset.iloc[:,3]
        self.x = self.dataset.iloc[:,[0,1,2]]
        self.x_train, self.x_test, self.y_train, self.y_test= train_test_split(self.x, self.y, test_size= 0.20, random_state=42)
        
        
    def decisionTrees(self):
        self.classifier = tree.DecisionTreeClassifier()
        self.classifier.fit(self.x_train, self.y_train)
        self.y_pred = self.classifier.predict(self.x_test)
        print("----------Decision Trees----------")
        cm = confusion_matrix(self.y_test, self.y_pred)
        display(cm)
        accuracy = sklearn.metrics.accuracy_score(self.y_test,self.y_pred)*100
        print("Accuracy: ", accuracy)
        print("\n\n")
        
    def classification(self, userinput):
        self.result = self.classifier.predict(userinput)
        print(self.result)

    def save_model(self):
      filename = "Confidence_Classifier.sav"
      pickle.dump(self.classifier, open(filename,'wb'))        
    
        
obj = ConfidenceEstimationModel()
obj.load_dataset()
obj.eda()
obj.pre_process()
obj.splitting()
obj.decisionTrees()
uinp = [[4,0,0],
       [2,0,0]]
obj.classification(uinp)
obj.save_model()


Unnamed: 0,Expression,Direction,Eye Movement,Label
0,neutral,Looking Up,center,1.0
1,neutral,Forward,right,1.0
2,neutral,Looking Up,left,1.0
3,happy,Forward,left,1.0
4,sad,Looking Up,center,1.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4068 entries, 0 to 4067
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Expression    2961 non-null   object 
 1   Direction     3529 non-null   object 
 2   Eye Movement  3061 non-null   object 
 3   Label         3116 non-null   float64
dtypes: float64(1), object(3)
memory usage: 127.2+ KB


None

1.0    1926
0.0    1190
Name: Label, dtype: int64

Length of Dataset :  4068
NULL Values : 


Expression      1107
Direction        539
Eye Movement    1007
Label            952
dtype: int64




Column Name: Expression


neutral     2011
sad          429
happy        273
fear         139
angry        101
surprise       8
Name: Expression, dtype: int64




Column Name: Direction


Forward          1833
Looking Up        717
Looking Right     590
Looking Left      253
Looking Down      136
Name: Direction, dtype: int64




Column Name: Eye Movement


center    1036
down       758
closed     662
left       230
right      211
up         164
Name: Eye Movement, dtype: int64




Column Name: Label


1.0    1926
0.0    1190
Name: Label, dtype: int64




<class 'pandas.core.frame.DataFrame'>
Int64Index: 2848 entries, 0 to 4066
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Expression    2848 non-null   object 
 1   Direction     2848 non-null   object 
 2   Eye Movement  2848 non-null   object 
 3   Label         2848 non-null   float64
dtypes: float64(1), object(3)
memory usage: 111.2+ KB


None

Column Name: Expression


3    1993
4     337
2     272
1     137
0     101
5       8
Name: Expression, dtype: int64




Column Name: Direction


0    1513
4     532
3     508
2     171
1     124
Name: Direction, dtype: int64




Column Name: Eye Movement


0    1004
2     638
1     631
3     222
4     203
5     150
Name: Eye Movement, dtype: int64




Column Name: Label


1.0    1863
0.0     985
Name: Label, dtype: int64




datatype: 
 Expression      float64
Direction       float64
Eye Movement    float64
Label           float64
dtype: object
----------Decision Trees----------


array([[185,  17],
       [  7, 361]])

Accuracy:  95.78947368421052



[0. 1.]


In [None]:
filename = "Confidence_Classifier.sav"
inp = [[4,0,0],
       [2,0,0]]
load_model = pickle.load(open(filename, 'rb'))
res = load_model.predict(inp)
print(res)

[0. 1.]
