<a href="https://colab.research.google.com/github/ml2-picme/PicMe/blob/master/ImageTextMatching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
'''

This is the entry class for the Image-Text Matching projekt, done by the ML² course at Offenburg University of Applied Science
The project is organized in 3 parts:
1.) image classification
2.) text stemming
3.) bringing both parts above together

The goals of the project are:
a) Giving a term and find matching images and text files
b) Automatic directory search and showing image-text matching results

'''

### Preparation

In [0]:
# Imports used for this notebook

import sys
import hashlib

from urllib.request import urlopen
from keras.applications import *

# PIP installations

!pip install mysql-connector-python-rf

In [0]:
# Clone the actual project into the Colab instance, for enabling the usage of the self-written scripts

# Remove Path if already existing locally
!rm -r PicMe

# Clone Git repository
!git clone https://github.com/ml2-picme/PicMe.git

# Add the relevant paths of the repo to system path
sys.path.append("/content/PicMe")
sys.path.append("/content/PicMe/scripts")

# Add the functions, defined in the script files

import script_hub

import db_connector
import file_processing
import image_classification
import imagenet_tree_search
import text_processing
import email_processing

### Parameters

In [0]:
# Parameters

path = "/tmp/image_classification"
hashrange = 20

imageExtensions = [".jpg", ".png", ".bmp"]
emailExtensions = [".eml"]
documentExtensions = [".docx"]

dbUser = "ml2"
dbPassword = "ml2@hsOg#2019!"
dbHost = "192.52.33.218"
dbDatabase = "ml2"
dbAutoCommit = True

searchTerm = "fire"

### Entry methods

In [0]:
# Get a database connection (re-used in methods below)
dbConnection = script_hub.createDbConnection(dbUser, dbPassword, dbHost, dbDatabase, dbAutoCommit)

# This is the entry method for directory preparation. It downloads the files from ./input directory into local directories on colab instance
def simulateDirectoryStructure(path, hashrange):
  script_hub.createDirectoryStructure(path, hashrange)
  script_hub.downloadPictures(path, hashrange)
  script_hub.downloadEmails(path, hashrange)

# This is the entry method for examining the local directory structure. It looks for images, emails and documents and writes results to DB
def examineDirectoryStructure(path, imageExtensions, emailExtensions, documentExtensions, dbConnection):
  script_hub.examineImages(path, imageExtensions, dbConnection)
  script_hub.examineEmails(path, emailExtensions, dbConnection)

# This is the entry method for finding results based on a fix search-term
def searchByTerm(searchTerm, dbConnection):
  script_hub.searchImagesBasedOnTerm(searchTerm, dbConnection)
  script_hub.searchEmailsBasedOnTerm(searchTerm, dbConnection)

# This is the entry method for automatically finding image-text mappings
def searchImageTextMappings(dbConnection):
  script_hub.searchDbAutomaticallyForImageTextMappings(dbConnection)

### Logic

In [0]:
# Start with simulation of a directory structure: Download images, emails and documents and save them in local directories

simulateDirectoryStructure(path, hashrange)

In [0]:
# Now find corresponding files in this simulated directory structure

examineDirectoryStructure(path, imageExtensions, emailExtensions, documentExtensions, dbConnection)

In [0]:
# Goal a) Search the results by a given term

resultCursorWithImages = searchByTerm(searchTerm, dbConnection)

In [0]:
# Goal b) Search automatically for matching images, emails and documents

searchImageTextMappings(dbConnection)