<a href="https://colab.research.google.com/github/ml2-picme/PicMe/blob/master/Image%20Download%20and%20Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Preparation

### 1.1 Needed libraries for this notebook

In [0]:
import sys
import hashlib
from urllib.request import urlopen
from keras.applications import *

### 1.2 Install needed libraries

In [0]:
!pip install mysql-connector-python-rf

### 1.3 Clone the Github Project itself, to be able to add the functions defined in the ./scripts sub-directory

In [0]:
# Remove Path if already existing locally
!rm -r PicMe

# Clone Git repository
!git clone https://github.com/ml2-picme/PicMe.git

# Add the relevant paths of the repo to system path
sys.path.append("/content/PicMe")
sys.path.append("/content/PicMe/scripts")

# Add the functions, defined in the script files
from db_connector import *
from file_processing import *
from image_classification import *
from imagenet_tree_search import *
from text_processing import *

## Logic

In [0]:
# Parameters
path = "/tmp/image_classification"
hashrange = 20

In [0]:
# Preparation: Delete local files
deleteLocalDirectory(path)

In [0]:
# Preparation: Create local directory structure
for i in range(hashrange):
  if(i % 10 == 0):
    parentPath = path + "/" + str((int)(i/10))
    createLocalDirectory(parentPath)
  normalizedI = '%02d' % i  # Normalization, pad zeroes
  filePath = parentPath + "/" + normalizedI
  createLocalDirectory(filePath)

In [0]:
filesDict = {}

data = urlopen("https://raw.githubusercontent.com/ml2-picme/PicMe/master/input/images_subset.txt")
for line in data:
  if not line.startswith(b'#'):  # Ignore Lines that begin with a comment (#)
    line = line.decode("utf-8").split("\n")[0]  # Normalization
    url = line.split(";")[0]
    label = line.split(";")[1]
    
    filename = getFileNameFromPath(url)

    hashvalue = int(hashlib.sha1(filename.encode('utf-8')).hexdigest(), 16) % hashrange
    parent_dir = (int)(hashvalue / 10)
    hashvalue = '%02d' % hashvalue  # Normalization, pad zeroes
    
    filetype = filename.split(".")[len(filename.split(".")) - 1]
    newFilename = label + "." + filetype
    #print(newFilename)

    print(url, " -> ", hashvalue, " -> ", label, " -> ", parent_dir, " -> ", filename)

    localPath = path + "/" + str(parent_dir) + "/" + hashvalue + "/" + newFilename

    downloadFileFromUrl(url, localPath)
    
    filesDict[localPath] = url
    
for x, y in filesDict.items():
  print(x, "->", y)

In [0]:
extensionsToCheck = [".jpg", ".png", ".bmp"]
foundFiles = findFilesInPathByFileExtension(path, extensionsToCheck)

for foundFile in foundFiles:
  print(foundFile)

preparedImages224x224 = prepareImagesForClassification(foundFiles, 224, 224)
preparedImages299x299 = prepareImagesForClassification(foundFiles, 299, 299)

In [0]:
# Achtung: Hier werden Funktionen übergeben: 
# 1) preprocess_input Funktion
# 2) decode_predictions Funktion
# => Dies sorgt dafür, dass die gesamte Klassifizierung ausgelagert und dynamisch aufgerufen werden kann!

predictedClassesVGG16 = classifyImages(preparedImages224x224, vgg16.preprocess_input, vgg16.decode_predictions, vgg16.VGG16(input_shape=(224, 224, 3)))
predictedClassesVGG19 = classifyImages(preparedImages224x224, vgg19.preprocess_input, vgg19.decode_predictions, vgg19.VGG19(input_shape=(224, 224, 3)))
#predictedClassesMobileNetV2 = classifyImages(preparedImages224x224, mobilenet_v2.preprocess_input, mobilenet_v2.decode_predictions, mobilenet_v2.MobileNetV2(input_shape=(224, 224, 3)))
#predictedClassesResNet50 = classifyImages(preparedImages224x224, resnet50.preprocess_input, resnet50.decode_predictions, resnet50.ResNet50(input_shape=(224, 224, 3)))
#predictedClassesDenseNet201 = classifyImages(preparedImages224x224, densenet.preprocess_input, densenet.decode_predictions, densenet.DenseNet201(input_shape=(224, 224, 3)))
#predictedClassesInceptionV3 = classifyImages(preparedImages299x299, inception_v3.preprocess_input, inception_v3.decode_predictions, inception_v3.InceptionV3(input_shape=(299, 299, 3)))
#predictedClassesXception = classifyImages(preparedImages299x299, xception.preprocess_input, xception.decode_predictions, xception.Xception(input_shape=(299, 299, 3)))
#predictedClassesInceptionResNet = classifyImages(preparedImages299x299, inception_resnet_v2.preprocess_input, inception_resnet_v2.decode_predictions,inception_resnet_v2.InceptionResNetV2(input_shape=(299, 299, 3)))

In [0]:
resultsList = [predictedClassesVGG16, predictedClassesVGG19]#, predictedClassesMobileNetV2, predictedClassesResNet50, predictedClassesDenseNet201, predictedClassesInceptionV3, predictedClassesXception, predictedClassesInceptionResNet]
modelList = ['VGG16', 'VGG19']#, 'MobileNetV2', 'ResNet50', 'DenseNet201', 'InceptionV3', 'Xception', 'InceptionResNet']

# Diese compareResults Methode braucht 4 Parameter:
# 1. Die Liste der Dateinamen
# 2. Die Ergebnisse der einzelnen Modellen, als Array zusammengefasst
# 3. Die Namen der Modelle, einfach als String Array
# 4. Threshold (mind. Sicherheit der Modellvorhersage) => nach Treffen am 17.04. auf 0.0 gesetzt (= kein Filter)
compareResults(foundFiles, resultsList, modelList, 0.00, preparedImages299x299)

In [0]:
allResultsCsv = generateCsvForModelComparison(foundFiles, resultsList, modelList, filesDict, getFileNameFromPath)

for result in allResultsCsv:
  print(result)

In [0]:
dbConnection = createConnection()

In [0]:
# DB stuff

parentToChildrenDictionary = getParentToChildrenDictionary()
childToParentsDictionary = getChildToParentsDictionary()

# Iterating the classification results:
for k in range(len(modelList)):
    print("==== other model =====")
    for i in range(len(foundFiles)):
      print("==== other file =====")
      for j in range(5):
        print("Counter:")
        print("Model", (k+1), "of", len(modelList))
        print("File", (i+1), "of", len(foundFiles))
        print("Top", (j+1), "of", 5)

        fileName = foundFiles[i]
        modelName = modelList[k]
        predictedClassSynsetId = resultsList[k][i][j][0]
        predictedClass = resultsList[k][i][j][1]
        predictedProbability = resultsList[k][i][j][2]

        # Store the original class to DB
        storeImageClassificationResultToDB(dbConnection, fileName, modelName, predictedClass, predictedProbability)
        
        # Expand ImageNet classes by ImageNet tree search
        newWords = getWords(predictedClassSynsetId, parentToChildrenDictionary, childToParentsDictionary)
        
        # Also save these new results to DB
        for newWord in newWords:
          storeImageClassificationResultToDB(dbConnection, fileName, modelName, newWord, predictedProbability)
        

In [0]:
resultCursor = querySearchWordAndPrintResults(dbConnection, "tutti-frutti", prepareImagesForClassification)

In [0]:
resultCursor = querySearchWordAndPrintResults(dbConnection, "indigo bunting", prepareImagesForClassification)