# # Import Required Libraries

In [1]:
# libraries for pre processing
from glob import glob
import numpy as np
import cv2

# scikit learn libraries for machine learning models
from sklearn.linear_model import LogisticRegression
from sklearn import svm 
from sklearn.ensemble import VotingClassifier
import warnings

# pytorch libraries for deep learning models
import torch
from torchvision import models,transforms
from torchvision.datasets import ImageFolder
import torch.utils.data as data_utils

from torch.autograd import Variable

import numpy
import matplotlib.pyplot as graph




# # Data Preprocessing


In [2]:
def get_train_eval_data(base_dataset_path,t_normal_images_path,t_affected_images_path,e_normal_images_path,e_affected_images_path,flag):
	t_normal_images=glob(base_dataset_path+t_normal_images_path+'\*.jpg')
	t_affected_images=glob(base_dataset_path+t_affected_images_path+'\*.jpg')
	e_normal_images=glob(base_dataset_path+e_normal_images_path+'\*.jpg')
	e_affected_images=glob(base_dataset_path+e_affected_images_path+'\*.jpg')
	#print("Normal images for training:%d"%(len(t_normal_images)))
	#print("Affected images for training:%d"%(len(t_affected_images)))
	train_features=[]
	train_labels=[]
	eval_features=[]
	eval_labels=[]

	t_e_normal_affected_images=[t_normal_images,t_affected_images,e_normal_images,e_affected_images]
	
	for images in t_e_normal_affected_images:
		index=t_e_normal_affected_images.index(images)
		for image in images:
			img=cv2.imread(image,0)
			#img=cv2.resize(img,(28,28))
			img_2d=np.array(img)
			if flag==1:
				img_2d=img_2d.reshape(1,40,40)
			else:
				img_2d=img_2d.ravel()

			if index == 0 or index == 1:
				train_features.append(img_2d)
				if index%2 == 0:
					train_labels.append(0)
				else:
					train_labels.append(1)
			else:
				eval_features.append(img_2d)
				if index%2 == 0:
					eval_labels.append(0)
				else:
					eval_labels.append(1)


	return np.array(train_features),np.array(train_labels),np.array(eval_features),np.array(eval_labels)


# # Machine learning models

In [3]:
#Function for executing logistic regression
def logistic_regression(train_features,train_labels,eval_features,eval_labels):
    print("Logistic Regression:")
    print("Training started...")
    log_reg_classifier=LogisticRegression(solver='saga',verbose=0,tol=1e-3,max_iter=1000)
    log_reg_classifier.fit(train_features,train_labels)
    print("Training finished...")
    print("Accuracy on training dataset is %f"%(log_reg_classifier.score(train_features,train_labels)))
    print("Accuracy on evaluation dataset is %f"%(log_reg_classifier.score(eval_features,eval_labels)))
    

#Function for executing support_vector_machine
def support_vector_machine(train_features,train_labels,eval_features,eval_labels):
    print("Support Vector Machine:")
    print("Training started...")
    svm_classifier=svm.LinearSVC(C=100,verbose=0,tol=1e-3)
    svm_classifier.fit(train_features,train_labels)
    print("Training finished...")
    print("Accuracy on training dataset is %f"%(svm_classifier.score(train_features,train_labels)))
    print("Accuracy on evaluation dataset is %f"%(svm_classifier.score(eval_features,eval_labels)))
    

#Function for executing LR + SVM 
def ensemble_logreg_svm(train_features,train_labels,eval_features,eval_labels):
    print("Logistic Regression + Support Vector Machine:")
    print("Training started...")
    log_reg_classifier=LogisticRegression()
    svm_classifier=svm.LinearSVC()
    en_lr_svm = VotingClassifier(estimators=[('lr',log_reg_classifier),('svm',svm_classifier)],voting='hard')
    en_lr_svm.fit(train_features,train_labels)
    print("Training finished...")
    print("Accuracy on training dataset is %f"%(en_lr_svm.score(train_features,train_labels)))
    print("Accuracy on evaluation dataset is %f"%(en_lr_svm.score(eval_features,eval_labels)))

#Function for executing resnet
def resnet(flag,skip):
    # Create model
    if(flag==152):
        resnet = models.resnet152(pretrained=True)
        print("Resnet152")
    elif(flag==50):
        print("Resnet50")
        resnet = models.resnet50(pretrained=True)
    
    for param in resnet.parameters():
        param.requires_grad = False

    resnet.fc = torch.nn.Linear(resnet.fc.in_features,2)
    
    # Prepare data
    transform = transforms.Compose([transforms.ToTensor()])

    train_data = ImageFolder(root=r'data\train',transform=transform)
    eval_data = ImageFolder(root=r'data\test',transform=transform)

    # define few hyper parameter

    batch_size = 1

    learning_rate = 0.001

    optimizer = torch.optim.SGD(resnet.fc.parameters(),lr=learning_rate,momentum=0.9)
    criterion = torch.nn.CrossEntropyLoss()

    # create dataloader
    train_data_loader=data_utils.DataLoader(dataset=train_data,shuffle=True,batch_size=batch_size)
    eval_data_loader=data_utils.DataLoader(dataset=eval_data,shuffle=True,batch_size=batch_size)

    # Important performance measure variables
    best_accuracy = 0.0
    best_loss = 1000000
    epoch = 0
    epoch_list=[]
    train_loss_list=[]
    train_acc_list=[]
    eval_loss_list=[]
    eval_acc_list=[]
    if(skip==False):
        print("Training started...")
        while (best_accuracy !=1.000):
            #print("Epoch:%d"%(epoch+1))
            total_loss = 0
            total_trained = 0
            total_correct = 0
            for i,(train_features,train_labels) in enumerate(train_data_loader):

                train_features=Variable(train_features)
                train_labels=Variable(train_labels)

                resnet.train()

                optimizer.zero_grad()

                output = resnet(train_features)

                _ , predicted = torch.max(output.data,1)

                loss = criterion(output,train_labels)

                total_loss += loss.data
                total_trained += len(train_labels)

                total_correct += (predicted==train_labels.data)

                loss.backward()

                optimizer.step()

            train_current_accuracy = float(total_correct)/total_trained
            #print("Epoch:%d"%(epoch+1))
            #print("Training loss:%f Training accuracy:%f"%(total_loss,train_current_accuracy))
            # temporary changes
            epoch_list.append(epoch+1)
            train_loss_list.append(total_loss)
            train_acc_list.append(train_current_accuracy)

            total_loss = 0
            total_eval = 0
            total_correct = 0
            for i,(eval_features,eval_labels) in enumerate(eval_data_loader):

                eval_features=Variable(eval_features)
                eval_labels=Variable(eval_labels)

                resnet.eval()

                output = resnet(eval_features)

                _ , predicted = torch.max(output.data,1)

                loss = criterion(output,train_labels)

                total_loss += loss.data
                total_eval += len(train_labels)

                total_correct += (predicted==eval_labels.data)


            eval_current_accuracy = float(total_correct)/total_eval
            #print("Eval loss:%f Eval accuracy:%f"%(total_loss,eval_current_accuracy))        
            eval_loss_list.append(total_loss)
            eval_acc_list.append(eval_current_accuracy)




            #print("Loss :%f Total trained :%d Total correct :%d Accuracy:%f"%(total_loss,total_trained,total_correct,current_accuracy))

            if train_current_accuracy > best_accuracy :
                best_model = resnet.state_dict()
                best_accuracy = train_current_accuracy
                #print("This is the best state so far")

            epoch +=1

        print("Training finished...")
        print("Accuracy on training dataset is %f",best_accuracy)
        
        #Load model with the parameters
        model=resnet
        model.load_state_dict(best_model)
        
        if(flag==152):
            torch.save(best_model,'./resnet152.pth')
        else:
            torch.save(best_model,'./resnet50.pth')
    else:
        model = resnet
        if(flag==152):
            model.load_state_dict(torch.load('./resnet152.pth'))
        else:
            model.load_state_dict(torch.load('./resnet50.pth'))
            
    model.eval()
    total_eval = 0
    correct_eval = 0
    for i,(eval_features,eval_labels) in enumerate(eval_data_loader):
        eval_features=Variable(eval_features)
        
        eval_labels=Variable(eval_labels)
        
        output=model(eval_features)
        
        _,predicted = torch.max(output.data,1)
        
        total_eval += len(eval_labels)
        
        correct_eval += (predicted==eval_labels.data).sum()

    print("The accuracy on evaluation dataset is %f"%(float(correct_eval)/total_eval))
    
    



# # Specify the dataset path

In [4]:
base_dataset_path=r'..\Data_Set\resized_Images'
train_normal_images_path=r'\train_normal'
train_affected_images_path=r'\train_affected'
eval_normal_images_path=r'\test_normal'
eval_affected_images_path=r'\test_affected'

# # Train and evaluate the model

In [6]:
warnings.simplefilter('ignore', DeprecationWarning)
train_features,train_labels,eval_features,eval_labels=get_train_eval_data(base_dataset_path,train_normal_images_path,train_affected_images_path,eval_normal_images_path,eval_affected_images_path,0)
print("Total Images for training :%d"%(len(train_features)))
print("Total Images for evaluation :%d"%(len(eval_labels)))
print("\n")
logistic_regression(train_features,train_labels,eval_features,eval_labels)
print("\n")
support_vector_machine(train_features,train_labels,eval_features,eval_labels)
print("\n")
ensemble_logreg_svm(train_features,train_labels,eval_features,eval_labels)
print("\n")
resnet(50,skip=True) #if training required then set skip = False , else True
print("\n")
resnet(152,skip=True) #if training required then set skip = False , else True


Total Images for training :54
Total Images for evaluation :18


Logistic Regression:
Training started...
Training finished...
Accuracy on training dataset is 1.000000
Accuracy on evaluation dataset is 1.000000


Support Vector Machine:
Training started...
Training finished...
Accuracy on training dataset is 1.000000
Accuracy on evaluation dataset is 1.000000


Logistic Regression + Support Vector Machine:
Training started...
Training finished...
Accuracy on training dataset is 1.000000
Accuracy on evaluation dataset is 1.000000


Resnet50
The accuracy on evaluation dataset is 1.000000


Resnet152
The accuracy on evaluation dataset is 0.666667
