# Classifier für Wohnungsbilder

## Data Preparation

### Load Data

In [None]:
import PIL.Image
from PIL.ExifTags import TAGS

import os, sys
from shutil import copyfile

# load images
image_files = os.listdir("data/raw")

In [7]:
# extract image title from EXIF
def extract_labels(image_files, field_name = "XPSubject"):
    def get_field(exif, field):
        for (k,v) in exif.items():
            if TAGS.get(k) == field:
                return v.decode("utf-16")

    labels = {}

    for i in image_files:
        img = PIL.Image.open("data/raw/" + i)
        exif = img._getexif()
        title = get_field(exif, field_name)
        labels[i] = title
        img.close()

    return labels

labels = extract_labels(image_files)

In [11]:
# consolidate labels (e.g. different languages, Special Characters)
def consolidate_labels(labels):
    recodes = {
        "Plan d'ensemble": "Grundriss",
        "Vue extérieure": "Aussenansicht",
        "Varie": "Diverse",
        "Cuisine": "Küche",
        "Salle de bains":"Bad",
        "Vue intérieure":"Innenansicht",
        "Cucina":"Küche",
        "Bagno":"Bad",
        "immagine  LIA":"Bild  LIA",
        "Balkon/Terrasse/Sitzplatz":"Balkon_Terrasse_Sitzplatz",
        "Balcone/terrazza/posto a sedere":"Balkon_Terrasse_Sitzplatz",
        "immagine  LIA":"Bild  LIA",
        "Veduta esterna":"Aussenansicht", 
        "Stanza":"Zimmer",
        "Vue":"Aussicht",
        "Prospettiva":"Aussicht"
    }
    
    for l in labels.keys():
        if labels[l] in recodes.keys():
            labels[l] = recodes[labels[l]]
            
    return(labels)

labels_clean = consolidate_labels(labels)
            
print(set(labels_clean.values()))


{'Pianta', 'Balkon_Terrasse_Sitzplatz', 'Grundriss', 'Divers', 'Innenansicht', 'Veduta interna', 'Aussenansicht', 'corridoio', 'Soggiorno', 'Korridor', 'Diverse', 'Séjour', 'Zimmer', 'Bad', 'Wohnzimmer', 'couloir', 'Aussicht', 'Bild  LIA', 'Küche', 'Pièce'}


In [12]:
# organize files in one folder per class
def organize_files(input_dir, output_dir, labels):
    
    classes = set(labels.values())

    for c in classes:
        if not os.path.isdir(output_dir+c):
            os.mkdir(output_dir+c)

    for f in image_files:
        src = input_dir + f
        dst = output_dir + labels[f] + "/" + f
        copyfile(src, dst)
    
organize_files("data/raw/", "data/clean/", labels)