support of img category within meta_documents.csv

medialab · Dec 20, 2012 · 265ff10 · 265ff10
1 parent 102d2ba
commit 265ff10
Show file tree

Hide file tree

Showing 7 changed files with 43 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -37,6 +37,10 @@ To understand how the reanalyse project works, you'll need to read carefully:
 	$ hg clone https://bitbucket.org/cogtree/python-solr
 	$ sudo pyton python-solr/setup.py install
 
+### python-magic (used to detect file mimetype)
+
+	$ sudo easy_install python-magic
+
 ### apache > django conf
 
 	$ vi /apache/reanalyse.conf
@@ -50,8 +54,8 @@ To understand how the reanalyse project works, you'll need to read carefully:
 * `./solrdataindex/`
 * `./upload/`
 
+
 	$ vi settingsprivate.py
-
 	$ mkdir logs upload download solrdataindex
 	$ sudo chown -R www-data:www-data solr log upload download
 

diff --git a/reanalyseapp/globalvars.py b/reanalyseapp/globalvars.py
@@ -54,23 +54,25 @@
 DOC_CAT_2['transcr'] 	= 'Transcription'
 
 
-# C) meta_documents.csv : COLUMN *mimetype . 
-DOCUMENT_MIMETYPES 	=  ['ese','tei']			# special files (ese is saved as json, tei is parsed)
-DOCUMENT_MIMETYPES 	+= ['link','ref']			# doc without local file (only title/text/description, or link, ...)
-DOCUMENT_MIMETYPES	+= ['pdf','htm','csv']		# normaly displayed docs
+# C) meta_documents.csv : COLUMN *mimetype .
+# be careful ! "mimetype" doesn't mean real mimetype of the file, rather the category for the Texte model (to be renamed in Document)
+DOCUMENT_MIMETYPES 	=  ['ese','tei']				# special files (ese is saved as json, tei is parsed)
+DOCUMENT_MIMETYPES 	+= ['link','ref']				# doc without local file (only title/text/description, or link, ...)
+DOCUMENT_MIMETYPES	+= ['img','pdf','htm','csv']	# normaly displayed docs
 
 # documents are parsed only if they are in A) & B) & C)
 # note that ese is also processed, but in a different way. see importexport.py
 
-# types of document from the django models point of view (ie texte.doctype)
-# NB: this Texte attribute is made using *mimetype.upper()
+# when a Document is processed, the model is made using DOCUMENT_MIMETYPES.upper()
+# ... types of document used by the django models point of view (ie texte.doctype)
 DOCUMENT_TYPE_CHOICES = (
 	('TEI', 'XML TEI'),
 	('LINK', 'External link'),
 	('REF', 'Only reference'),
 	('PDF', 'PDF'),
 	('HTM', 'HTML File'),
 	('CSV', 'CSV Table'),
+	('IMG', 'Image'),
 	#('TXT', 'Text File'),
 	#('RTF', 'RTF'),			# rather use htm for the moment, it's simpler
 	#('ATL', 'XML Atlas.Ti'),	# ...forget about it for the moment (data too much unstructured)

diff --git a/reanalyseapp/imexport.py b/reanalyseapp/imexport.py
@@ -267,7 +267,7 @@ def importEnqueteUsingMeta(upPath,folderPath):
 							if doc_mimetype=='tei':
 								newDocument.status	= '5' # 'waiting' status
 								newDocument.save()
-							elif doc_mimetype=='pdf' or doc_mimetype=='csv':
+							elif doc_mimetype=='pdf' or doc_mimetype=='csv' or doc_mimetype=='img':
 								newDocument.status	= '0'
 								newDocument.save()
 							elif doc_mimetype=='htm':

diff --git a/reanalyseapp/views.py b/reanalyseapp/views.py
@@ -24,6 +24,9 @@
 import glob
 import os
 
+# to detect file mimetype when serving files (gexf, pdf, img, ...)
+import magic
+
 # solr process
 import subprocess
 
@@ -1274,7 +1277,9 @@ def edShow(request,eid,did):
 				thevals.append([cssClass,row[k]])
 			values.append(thevals)
 		ctx.update({'csvTable':{'columns':columns,'values':values}})
-	#########################################
+
+	######################################### PDF, IMG
+	# for pdf, img: nothing to do, everything is in the model (filelocation, name, etc...)
 
 	updateCtxWithPerm(ctx,request,e)
 	updateCtxWithSearchForm(ctx)
@@ -1409,6 +1414,14 @@ def servePdf(request,did):
 	fsock = open(d.locationpath,"rb").read()
 	response = HttpResponse(fsock, mimetype="application/pdf")
 	return response
+# serve img
+def serveImg(request,did):
+	d = Texte.objects.get(id=did)
+	mime = magic.Magic(mime=True)
+	filemime = mime.from_file(d.locationpath)
+	fsock = open(d.locationpath,"rb").read()
+	response = HttpResponse(fsock, mimetype=filemime)
+	return response
 ###########################################################################
 
 

diff --git a/templates/bq_ed_show.html b/templates/bq_ed_show.html
@@ -276,6 +276,10 @@ <h1>{{texte.name}}</h1>
 	</object>
 {% endif %}
 <!-- ===================================================================================================================================== -->
+{% if texte.doctype == 'IMG' %}
+	<img src="{% url reanalyse.reanalyseapp.views.serveImg texte.id %}">
+{% endif %}
+<!-- ===================================================================================================================================== -->
 {% if texte.doctype == 'HTM' %}
 	<div class="textdiv">
 		{% autoescape off %}

diff --git a/templates/content/method_content_fr.html b/templates/content/method_content_fr.html
@@ -44,7 +44,7 @@ <h2>Description complète de la normalisation d'une enquête</h2>
 		<tr><td class="content_norm_orig">doc, docx</td><td>.txt</td><td class="content_norm_orig">pour les verbatims (fichiers intermédiaires). ces fichiers .txt doivent être ensuite convertis au format TEI .xml (voir plus bas)</td><td>oui</td></tr>
 		<tr><td class="content_norm_orig">rtf</td><td>.htm</td><td class="content_norm_orig">limite la mise en page et le formattage à quelques éléments simples (titres, sous-titres, paragraphes) mais permet l'indexation (recherche dans le texte)</td><td>oui</td></tr>
 		<tr><td class="content_norm_orig">ppt</td><td>.pdf</td><td class="content_norm_orig">pas d'indexation du texte, mais conservation de la mise en page</td><td>oui</td></tr>
-		<tr><td class="content_norm_orig">image</td><td>.jpg</td><td class="content_norm_orig">...</td><td>non</td></tr>
+		<tr><td class="content_norm_orig">image</td><td>.jpg .png .gif</td><td class="content_norm_orig">affichage simple</td><td>oui</td></tr>
 		<tr><td class="content_norm_orig">audio</td><td>.mp3</td><td class="content_norm_orig">...</td><td>non</td></tr>
 	</table>
 	Lorsque le choix de l'encodage est proposé, toujours préférer "UTF-8"<br/>
@@ -161,7 +161,7 @@ <h2>Description complète de la normalisation d'une enquête</h2>
 			<tr class="content_norm_expl">
 				<td>*descr</td>
 				<td>emplacement du fichier dans le dossier enquête, ou lien externe</td>
-				<td>/xml /htm /csv /pdf /link /ref</td>
+				<td>/xml /htm /csv /pdf /img /link /ref</td>
 				<td>nom dans le site enquête</td>
 				<td>/preparatory /analyse /verbatim /publication /ese</td>
 				<td>apparaît en infobulle à coté de chaque document</td>
@@ -201,6 +201,12 @@ <h2>Description complète de la normalisation d'une enquête</h2>
 				<td class="content_norm_file content_norm_orig">Rapport d'enquête.doc</td>
 				<td class="content_norm_orig">export pdf</td></tr>
 
+			<tr><td>d234</td><td class="content_norm_file">_data/ image.png</td><td>img</td>
+				<td>Mon image</td><td>analyse</td>
+				<td>Photo des chiens de chasse</td><td>00/10/12</td><td></td>
+				<td class="content_norm_file content_norm_orig">image.raw</td>
+				<td class="content_norm_orig">save as png</td></tr>
+
 			<tr><td>d034</td><td class="content_norm_file">[NP]</td><td>ref</td>
 				<td>Publication journal TEI</td><td>publication</td>
 				<td>Article publié p.34-35 du journal TEI</td><td>15/10/12</td><td></td>

diff --git a/urls.py b/urls.py
@@ -19,8 +19,9 @@
 	(r'^stream/(?P<eid>\d+)/(?P<aid>\d+)$', 'reanalyseapp.views.stream'),
 	(r'^getesereport/(?P<eid>\d+)', 'reanalyseapp.views.getEseReport'),		# ESE report download
 	(r'^graph/download/(?P<gid>\d+)', 'reanalyseapp.views.downloadGraph'),
-	(r'^graph/serve/(?P<gid>\d+).gexf', 'reanalyseapp.views.serveGraph'),	# for gexf display (sigma?)
-	(r'^graph/serve/(?P<did>\d+).pdf', 'reanalyseapp.views.servePdf'),		# for simple pdf display
+	(r'^e/serve/gexf/(?P<gid>\d+).gexf', 'reanalyseapp.views.serveGraph'),	# for gexf display (sigma?)
+	(r'^e/serve/pdf/(?P<did>\d+).pdf', 'reanalyseapp.views.servePdf'),		# for simple pdf display
+	(r'^e/serve/img/(?P<did>\d+)', 'reanalyseapp.views.serveImg'),		# for simple img display
 
 	######################################################################################################
 	########## ENQUETES