Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

Commit

Permalink
Metadata extraction from image files (#22)
Browse files Browse the repository at this point in the history
* Tested locally metadata extraction with diff types of files

* Added Dockerfile for frontend

* Handled metadata nulls and checked for proper metadata updation

* Fixed linter issues
  • Loading branch information
prabhuomkar committed Sep 22, 2021
1 parent a1d96a1 commit 01e5f63
Show file tree
Hide file tree
Showing 12 changed files with 84 additions and 62 deletions.
54 changes: 15 additions & 39 deletions api/internal/graph/generated/generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions api/internal/models/mediaitem.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ type (
}

Photo struct {
CameraMake *string `json:"cameraMake"`
CameraModel *string `json:"cameraModel"`
FocalLength *int `json:"focalLength"`
ApertureFNumber *float64 `json:"apertureFNumber"`
IsoEquivalent *int `json:"isoEquivalent"`
ExposureTime *string `json:"exposureTime"`
CameraMake *string `json:"cameraMake"`
CameraModel *string `json:"cameraModel"`
FocalLength *string `json:"focalLength"`
ApertureFNumber *string `json:"apertureFNumber"`
IsoEquivalent *int `json:"isoEquivalent"`
ExposureTime *string `json:"exposureTime"`
}
)
6 changes: 3 additions & 3 deletions api/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ type MediaItem {
mimeType: String!
fileName: String!
fileSize: Int!
mediaMetadata: MediaMetaData!
mediaMetadata: MediaMetaData
createdAt: Time!
updatedAt: Time!
}
Expand All @@ -23,8 +23,8 @@ type MediaMetaData {
type Photo {
cameraMake: String
cameraModel: String
focalLength: Int
apertureFNumber: Float
focalLength: String
apertureFNumber: String
isoEquivalent: Int
exposureTime: String
}
Expand Down
10 changes: 10 additions & 0 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM node:14-alpine as build-step
RUN mkdir /app
WORKDIR /app
COPY package.json /app
RUN npm install
COPY . /app
RUN npm run build

FROM nginx:1.21.3-alpine
COPY --from=build-step /app/build /usr/share/nginx/html
3 changes: 2 additions & 1 deletion ml/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ ignore=CVS, .svn, .git
[MESSAGES CONTROL]
disable=missing-module-docstring,invalid-name

indent-string=' '
indent-string=' '
max-line-length=160
14 changes: 13 additions & 1 deletion ml/pipeline/component.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
"""Component"""
import json
import urllib.request
from bson.objectid import ObjectId


class Component:
"""Component"""
def __init__(self, name, queue):
def __init__(self, name, queue, db):
self.name = name
self.queue = queue
self.db = db

def callback(self, _, __, ___, body):
"""RabbitMQ Callback"""
data = json.loads(body)
print(f'[{self.name}]: {data}')
self.process(data['id'], data['imageUrl'])

def download(self, oid, image_url): # pylint: disable=no-self-use
"""Downloads the file for processing"""
urllib.request.urlretrieve(image_url, f'image-{oid}')
print(f'downloaded file image-{oid}')

def update(self, oid, data):
"""Updates database with the pipeline result"""
self.db['mediaitems'].update_one({'_id': ObjectId(oid)}, {'$set': data})

def process(self, oid, image_url):
"""Component Process"""
raise NotImplementedError
26 changes: 22 additions & 4 deletions ml/pipeline/metadata.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
"""Metadata"""
import exifread
from .component import Component


class Metadata(Component):
"""Metadata Component"""
def __init__(self):
super().__init__('metadata', 'pipeline.metadata')
def __init__(self, db):
super().__init__('metadata', 'pipeline.metadata', db)

def process(self, oid, image_url):
print(oid)
print(image_url)
self.download(oid, image_url)
with open(f'image-{oid}', 'rb') as f:
tags = exifread.process_file(f)
if len(tags.keys()) > 0:
self.update(oid, {
'mediaMetadata': {
'creationTime': str(tags['EXIF DateTimeOriginal']) if 'EXIF DateTimeOriginal' in tags else None,
'width': tags['EXIF ExifImageLength'].values[0] if 'EXIF ExifImageLength' in tags else None,
'height': tags['EXIF ExifImageWidth'].values[0] if 'EXIF ExifImageWidth' in tags else None,
'photo': {
'cameraMake': str(tags['Image Make']) if 'Image Make' in tags else None,
'cameraModel': str(tags['Image Model']) if 'Image Model' in tags else None,
'focalLength': str(tags['EXIF FocalLength'].values[0].decimal()) if 'EXIF FocalLength' in tags else None,
'apertureFNumber': str(tags['EXIF FNumber'].values[0].decimal()) if 'EXIF FNumber' in tags else None,
'isoEquivalent': tags['EXIF ISOSpeedRatings'].values[0] if 'EXIF ISOSpeedRatings' in tags else None,
'exposureTime': str(tags['EXIF ExposureTime']) if 'EXIF ExposureTime' in tags else None,
},
}
})
4 changes: 2 additions & 2 deletions ml/pipeline/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

class People(Component):
"""People Component"""
def __init__(self):
super().__init__('people', 'pipeline.people')
def __init__(self, db):
super().__init__('people', 'pipeline.people', db)

def process(self, oid, image_url):
print(oid)
Expand Down
4 changes: 2 additions & 2 deletions ml/pipeline/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

class Places(Component):
"""Places Component"""
def __init__(self):
super().__init__('places', 'pipeline.places')
def __init__(self, db):
super().__init__('places', 'pipeline.places', db)

def process(self, oid, image_url):
print(oid)
Expand Down
4 changes: 2 additions & 2 deletions ml/pipeline/things.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

class Things(Component):
"""Things Component"""
def __init__(self):
super().__init__('things', 'pipeline.things')
def __init__(self, db):
super().__init__('things', 'pipeline.things', db)

def process(self, oid, image_url):
print(oid)
Expand Down
4 changes: 3 additions & 1 deletion ml/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
pika==1.2.0
pika==1.2.0
ExifRead==2.3.2
pymongo==3.11.3
5 changes: 4 additions & 1 deletion ml/worker.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""ML Worker"""
import pika
from pymongo import MongoClient
from pipeline import Metadata, People, Places, Things

client = MongoClient('mongodb://root:root@database:5010/iris?authSource=admin')
db = client['iris']

pipeline = [Metadata(), People(), Places(), Things()]
pipeline = [Metadata(db), People(db), Places(db), Things(db)]

def start_consumers():
"""Init rabbitmq connection and start consumers"""
Expand Down

0 comments on commit 01e5f63

Please sign in to comment.