From 01e5f6322606c3f90a05052afd50df5e628f7da8 Mon Sep 17 00:00:00 2001 From: Omkar Prabhu Date: Wed, 22 Sep 2021 22:13:28 +0530 Subject: [PATCH] Metadata extraction from image files (#22) * Tested locally metadata extraction with diff types of files * Added Dockerfile for frontend * Handled metadata nulls and checked for proper metadata updation * Fixed linter issues --- api/internal/graph/generated/generated.go | 54 +++++++---------------- api/internal/models/mediaitem.go | 12 ++--- api/schema.graphql | 6 +-- frontend/Dockerfile | 10 +++++ ml/.pylintrc | 3 +- ml/pipeline/component.py | 14 +++++- ml/pipeline/metadata.py | 26 +++++++++-- ml/pipeline/people.py | 4 +- ml/pipeline/places.py | 4 +- ml/pipeline/things.py | 4 +- ml/requirements.txt | 4 +- ml/worker.py | 5 ++- 12 files changed, 84 insertions(+), 62 deletions(-) create mode 100644 frontend/Dockerfile diff --git a/api/internal/graph/generated/generated.go b/api/internal/graph/generated/generated.go index 5796b35..400614d 100644 --- a/api/internal/graph/generated/generated.go +++ b/api/internal/graph/generated/generated.go @@ -513,7 +513,7 @@ type MediaItem { mimeType: String! fileName: String! fileSize: Int! - mediaMetadata: MediaMetaData! + mediaMetadata: MediaMetaData createdAt: Time! updatedAt: Time! } @@ -528,8 +528,8 @@ type MediaMetaData { type Photo { cameraMake: String cameraModel: String - focalLength: Int - apertureFNumber: Float + focalLength: String + apertureFNumber: String isoEquivalent: Int exposureTime: String } @@ -1346,14 +1346,11 @@ func (ec *executionContext) _MediaItem_mediaMetadata(ctx context.Context, field return graphql.Null } if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } return graphql.Null } res := resTmp.(*models.MediaMetaData) fc.Result = res - return ec.marshalNMediaMetaData2ᚖirisᚋapiᚋinternalᚋmodelsᚐMediaMetaData(ctx, field.Selections, res) + return ec.marshalOMediaMetaData2ᚖirisᚋapiᚋinternalᚋmodelsᚐMediaMetaData(ctx, field.Selections, res) } func (ec *executionContext) _MediaItem_createdAt(ctx context.Context, field graphql.CollectedField, obj *models.MediaItem) (ret graphql.Marshaler) { @@ -1805,9 +1802,9 @@ func (ec *executionContext) _Photo_focalLength(ctx context.Context, field graphq if resTmp == nil { return graphql.Null } - res := resTmp.(*int) + res := resTmp.(*string) fc.Result = res - return ec.marshalOInt2ᚖint(ctx, field.Selections, res) + return ec.marshalOString2ᚖstring(ctx, field.Selections, res) } func (ec *executionContext) _Photo_apertureFNumber(ctx context.Context, field graphql.CollectedField, obj *models.Photo) (ret graphql.Marshaler) { @@ -1837,9 +1834,9 @@ func (ec *executionContext) _Photo_apertureFNumber(ctx context.Context, field gr if resTmp == nil { return graphql.Null } - res := resTmp.(*float64) + res := resTmp.(*string) fc.Result = res - return ec.marshalOFloat2ᚖfloat64(ctx, field.Selections, res) + return ec.marshalOString2ᚖstring(ctx, field.Selections, res) } func (ec *executionContext) _Photo_isoEquivalent(ctx context.Context, field graphql.CollectedField, obj *models.Photo) (ret graphql.Marshaler) { @@ -3459,9 +3456,6 @@ func (ec *executionContext) _MediaItem(ctx context.Context, sel ast.SelectionSet } case "mediaMetadata": out.Values[i] = ec._MediaItem_mediaMetadata(ctx, field, obj) - if out.Values[i] == graphql.Null { - invalids++ - } case "createdAt": out.Values[i] = ec._MediaItem_createdAt(ctx, field, obj) if out.Values[i] == graphql.Null { @@ -4091,16 +4085,6 @@ func (ec *executionContext) marshalNMediaItemConnection2ᚖirisᚋapiᚋinternal return ec._MediaItemConnection(ctx, sel, v) } -func (ec *executionContext) marshalNMediaMetaData2ᚖirisᚋapiᚋinternalᚋmodelsᚐMediaMetaData(ctx context.Context, sel ast.SelectionSet, v *models.MediaMetaData) graphql.Marshaler { - if v == nil { - if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) { - ec.Errorf(ctx, "must not be null") - } - return graphql.Null - } - return ec._MediaMetaData(ctx, sel, v) -} - func (ec *executionContext) unmarshalNString2string(ctx context.Context, v interface{}) (string, error) { res, err := graphql.UnmarshalString(v) return res, graphql.ErrorOnPath(ctx, err) @@ -4439,21 +4423,6 @@ func (ec *executionContext) marshalOEntity2ᚕᚖirisᚋapiᚋinternalᚋmodels return ret } -func (ec *executionContext) unmarshalOFloat2ᚖfloat64(ctx context.Context, v interface{}) (*float64, error) { - if v == nil { - return nil, nil - } - res, err := graphql.UnmarshalFloat(v) - return &res, graphql.ErrorOnPath(ctx, err) -} - -func (ec *executionContext) marshalOFloat2ᚖfloat64(ctx context.Context, sel ast.SelectionSet, v *float64) graphql.Marshaler { - if v == nil { - return graphql.Null - } - return graphql.MarshalFloat(*v) -} - func (ec *executionContext) unmarshalOInt2ᚖint(ctx context.Context, v interface{}) (*int, error) { if v == nil { return nil, nil @@ -4509,6 +4478,13 @@ func (ec *executionContext) marshalOMediaItem2ᚕᚖirisᚋapiᚋinternalᚋmode return ret } +func (ec *executionContext) marshalOMediaMetaData2ᚖirisᚋapiᚋinternalᚋmodelsᚐMediaMetaData(ctx context.Context, sel ast.SelectionSet, v *models.MediaMetaData) graphql.Marshaler { + if v == nil { + return graphql.Null + } + return ec._MediaMetaData(ctx, sel, v) +} + func (ec *executionContext) marshalOPhoto2ᚖirisᚋapiᚋinternalᚋmodelsᚐPhoto(ctx context.Context, sel ast.SelectionSet, v *models.Photo) graphql.Marshaler { if v == nil { return graphql.Null diff --git a/api/internal/models/mediaitem.go b/api/internal/models/mediaitem.go index 87093f5..3544470 100644 --- a/api/internal/models/mediaitem.go +++ b/api/internal/models/mediaitem.go @@ -25,11 +25,11 @@ type ( } Photo struct { - CameraMake *string `json:"cameraMake"` - CameraModel *string `json:"cameraModel"` - FocalLength *int `json:"focalLength"` - ApertureFNumber *float64 `json:"apertureFNumber"` - IsoEquivalent *int `json:"isoEquivalent"` - ExposureTime *string `json:"exposureTime"` + CameraMake *string `json:"cameraMake"` + CameraModel *string `json:"cameraModel"` + FocalLength *string `json:"focalLength"` + ApertureFNumber *string `json:"apertureFNumber"` + IsoEquivalent *int `json:"isoEquivalent"` + ExposureTime *string `json:"exposureTime"` } ) diff --git a/api/schema.graphql b/api/schema.graphql index d6a7318..25b5302 100644 --- a/api/schema.graphql +++ b/api/schema.graphql @@ -8,7 +8,7 @@ type MediaItem { mimeType: String! fileName: String! fileSize: Int! - mediaMetadata: MediaMetaData! + mediaMetadata: MediaMetaData createdAt: Time! updatedAt: Time! } @@ -23,8 +23,8 @@ type MediaMetaData { type Photo { cameraMake: String cameraModel: String - focalLength: Int - apertureFNumber: Float + focalLength: String + apertureFNumber: String isoEquivalent: Int exposureTime: String } diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..c236090 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,10 @@ +FROM node:14-alpine as build-step +RUN mkdir /app +WORKDIR /app +COPY package.json /app +RUN npm install +COPY . /app +RUN npm run build + +FROM nginx:1.21.3-alpine +COPY --from=build-step /app/build /usr/share/nginx/html diff --git a/ml/.pylintrc b/ml/.pylintrc index deec985..8d2997f 100644 --- a/ml/.pylintrc +++ b/ml/.pylintrc @@ -4,4 +4,5 @@ ignore=CVS, .svn, .git [MESSAGES CONTROL] disable=missing-module-docstring,invalid-name -indent-string=' ' \ No newline at end of file +indent-string=' ' +max-line-length=160 \ No newline at end of file diff --git a/ml/pipeline/component.py b/ml/pipeline/component.py index 1b46bc7..b997a99 100644 --- a/ml/pipeline/component.py +++ b/ml/pipeline/component.py @@ -1,12 +1,15 @@ """Component""" import json +import urllib.request +from bson.objectid import ObjectId class Component: """Component""" - def __init__(self, name, queue): + def __init__(self, name, queue, db): self.name = name self.queue = queue + self.db = db def callback(self, _, __, ___, body): """RabbitMQ Callback""" @@ -14,6 +17,15 @@ def callback(self, _, __, ___, body): print(f'[{self.name}]: {data}') self.process(data['id'], data['imageUrl']) + def download(self, oid, image_url): # pylint: disable=no-self-use + """Downloads the file for processing""" + urllib.request.urlretrieve(image_url, f'image-{oid}') + print(f'downloaded file image-{oid}') + + def update(self, oid, data): + """Updates database with the pipeline result""" + self.db['mediaitems'].update_one({'_id': ObjectId(oid)}, {'$set': data}) + def process(self, oid, image_url): """Component Process""" raise NotImplementedError diff --git a/ml/pipeline/metadata.py b/ml/pipeline/metadata.py index 9729e5e..4b63b98 100644 --- a/ml/pipeline/metadata.py +++ b/ml/pipeline/metadata.py @@ -1,12 +1,30 @@ """Metadata""" +import exifread from .component import Component class Metadata(Component): """Metadata Component""" - def __init__(self): - super().__init__('metadata', 'pipeline.metadata') + def __init__(self, db): + super().__init__('metadata', 'pipeline.metadata', db) def process(self, oid, image_url): - print(oid) - print(image_url) + self.download(oid, image_url) + with open(f'image-{oid}', 'rb') as f: + tags = exifread.process_file(f) + if len(tags.keys()) > 0: + self.update(oid, { + 'mediaMetadata': { + 'creationTime': str(tags['EXIF DateTimeOriginal']) if 'EXIF DateTimeOriginal' in tags else None, + 'width': tags['EXIF ExifImageLength'].values[0] if 'EXIF ExifImageLength' in tags else None, + 'height': tags['EXIF ExifImageWidth'].values[0] if 'EXIF ExifImageWidth' in tags else None, + 'photo': { + 'cameraMake': str(tags['Image Make']) if 'Image Make' in tags else None, + 'cameraModel': str(tags['Image Model']) if 'Image Model' in tags else None, + 'focalLength': str(tags['EXIF FocalLength'].values[0].decimal()) if 'EXIF FocalLength' in tags else None, + 'apertureFNumber': str(tags['EXIF FNumber'].values[0].decimal()) if 'EXIF FNumber' in tags else None, + 'isoEquivalent': tags['EXIF ISOSpeedRatings'].values[0] if 'EXIF ISOSpeedRatings' in tags else None, + 'exposureTime': str(tags['EXIF ExposureTime']) if 'EXIF ExposureTime' in tags else None, + }, + } + }) diff --git a/ml/pipeline/people.py b/ml/pipeline/people.py index ecf7eda..c8edcdc 100644 --- a/ml/pipeline/people.py +++ b/ml/pipeline/people.py @@ -4,8 +4,8 @@ class People(Component): """People Component""" - def __init__(self): - super().__init__('people', 'pipeline.people') + def __init__(self, db): + super().__init__('people', 'pipeline.people', db) def process(self, oid, image_url): print(oid) diff --git a/ml/pipeline/places.py b/ml/pipeline/places.py index 3ffa940..9e16b90 100644 --- a/ml/pipeline/places.py +++ b/ml/pipeline/places.py @@ -4,8 +4,8 @@ class Places(Component): """Places Component""" - def __init__(self): - super().__init__('places', 'pipeline.places') + def __init__(self, db): + super().__init__('places', 'pipeline.places', db) def process(self, oid, image_url): print(oid) diff --git a/ml/pipeline/things.py b/ml/pipeline/things.py index e818574..b564c69 100644 --- a/ml/pipeline/things.py +++ b/ml/pipeline/things.py @@ -4,8 +4,8 @@ class Things(Component): """Things Component""" - def __init__(self): - super().__init__('things', 'pipeline.things') + def __init__(self, db): + super().__init__('things', 'pipeline.things', db) def process(self, oid, image_url): print(oid) diff --git a/ml/requirements.txt b/ml/requirements.txt index 6adb182..ce55d00 100644 --- a/ml/requirements.txt +++ b/ml/requirements.txt @@ -1 +1,3 @@ -pika==1.2.0 \ No newline at end of file +pika==1.2.0 +ExifRead==2.3.2 +pymongo==3.11.3 diff --git a/ml/worker.py b/ml/worker.py index 8a89d7f..e950790 100644 --- a/ml/worker.py +++ b/ml/worker.py @@ -1,9 +1,12 @@ """ML Worker""" import pika +from pymongo import MongoClient from pipeline import Metadata, People, Places, Things +client = MongoClient('mongodb://root:root@database:5010/iris?authSource=admin') +db = client['iris'] -pipeline = [Metadata(), People(), Places(), Things()] +pipeline = [Metadata(db), People(db), Places(db), Things(db)] def start_consumers(): """Init rabbitmq connection and start consumers"""