Skip to content

Commit

Permalink
support of img category within meta_documents.csv
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrejdlf committed Dec 20, 2012
1 parent 102d2ba commit 265ff10
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 13 deletions.
6 changes: 5 additions & 1 deletion README.md
Expand Up @@ -37,6 +37,10 @@ To understand how the reanalyse project works, you'll need to read carefully:
$ hg clone https://bitbucket.org/cogtree/python-solr
$ sudo pyton python-solr/setup.py install

### python-magic (used to detect file mimetype)

$ sudo easy_install python-magic

### apache > django conf

$ vi /apache/reanalyse.conf
Expand All @@ -50,8 +54,8 @@ To understand how the reanalyse project works, you'll need to read carefully:
* `./solrdataindex/`
* `./upload/`


$ vi settingsprivate.py

$ mkdir logs upload download solrdataindex
$ sudo chown -R www-data:www-data solr log upload download

Expand Down
14 changes: 8 additions & 6 deletions reanalyseapp/globalvars.py
Expand Up @@ -54,23 +54,25 @@
DOC_CAT_2['transcr'] = 'Transcription'


# C) meta_documents.csv : COLUMN *mimetype .
DOCUMENT_MIMETYPES = ['ese','tei'] # special files (ese is saved as json, tei is parsed)
DOCUMENT_MIMETYPES += ['link','ref'] # doc without local file (only title/text/description, or link, ...)
DOCUMENT_MIMETYPES += ['pdf','htm','csv'] # normaly displayed docs
# C) meta_documents.csv : COLUMN *mimetype .
# be careful ! "mimetype" doesn't mean real mimetype of the file, rather the category for the Texte model (to be renamed in Document)
DOCUMENT_MIMETYPES = ['ese','tei'] # special files (ese is saved as json, tei is parsed)
DOCUMENT_MIMETYPES += ['link','ref'] # doc without local file (only title/text/description, or link, ...)
DOCUMENT_MIMETYPES += ['img','pdf','htm','csv'] # normaly displayed docs

# documents are parsed only if they are in A) & B) & C)
# note that ese is also processed, but in a different way. see importexport.py

# types of document from the django models point of view (ie texte.doctype)
# NB: this Texte attribute is made using *mimetype.upper()
# when a Document is processed, the model is made using DOCUMENT_MIMETYPES.upper()
# ... types of document used by the django models point of view (ie texte.doctype)
DOCUMENT_TYPE_CHOICES = (
('TEI', 'XML TEI'),
('LINK', 'External link'),
('REF', 'Only reference'),
('PDF', 'PDF'),
('HTM', 'HTML File'),
('CSV', 'CSV Table'),
('IMG', 'Image'),
#('TXT', 'Text File'),
#('RTF', 'RTF'), # rather use htm for the moment, it's simpler
#('ATL', 'XML Atlas.Ti'), # ...forget about it for the moment (data too much unstructured)
Expand Down
2 changes: 1 addition & 1 deletion reanalyseapp/imexport.py
Expand Up @@ -267,7 +267,7 @@ def importEnqueteUsingMeta(upPath,folderPath):
if doc_mimetype=='tei':
newDocument.status = '5' # 'waiting' status
newDocument.save()
elif doc_mimetype=='pdf' or doc_mimetype=='csv':
elif doc_mimetype=='pdf' or doc_mimetype=='csv' or doc_mimetype=='img':
newDocument.status = '0'
newDocument.save()
elif doc_mimetype=='htm':
Expand Down
15 changes: 14 additions & 1 deletion reanalyseapp/views.py
Expand Up @@ -24,6 +24,9 @@
import glob
import os

# to detect file mimetype when serving files (gexf, pdf, img, ...)
import magic

# solr process
import subprocess

Expand Down Expand Up @@ -1274,7 +1277,9 @@ def edShow(request,eid,did):
thevals.append([cssClass,row[k]])
values.append(thevals)
ctx.update({'csvTable':{'columns':columns,'values':values}})
#########################################

######################################### PDF, IMG
# for pdf, img: nothing to do, everything is in the model (filelocation, name, etc...)

updateCtxWithPerm(ctx,request,e)
updateCtxWithSearchForm(ctx)
Expand Down Expand Up @@ -1409,6 +1414,14 @@ def servePdf(request,did):
fsock = open(d.locationpath,"rb").read()
response = HttpResponse(fsock, mimetype="application/pdf")
return response
# serve img
def serveImg(request,did):
d = Texte.objects.get(id=did)
mime = magic.Magic(mime=True)
filemime = mime.from_file(d.locationpath)
fsock = open(d.locationpath,"rb").read()
response = HttpResponse(fsock, mimetype=filemime)
return response
###########################################################################


Expand Down
4 changes: 4 additions & 0 deletions templates/bq_ed_show.html
Expand Up @@ -276,6 +276,10 @@ <h1>{{texte.name}}</h1>
</object>
{% endif %}
<!-- ===================================================================================================================================== -->
{% if texte.doctype == 'IMG' %}
<img src="{% url reanalyse.reanalyseapp.views.serveImg texte.id %}">
{% endif %}
<!-- ===================================================================================================================================== -->
{% if texte.doctype == 'HTM' %}
<div class="textdiv">
{% autoescape off %}
Expand Down
10 changes: 8 additions & 2 deletions templates/content/method_content_fr.html
Expand Up @@ -44,7 +44,7 @@ <h2>Description complète de la normalisation d'une enquête</h2>
<tr><td class="content_norm_orig">doc, docx</td><td>.txt</td><td class="content_norm_orig">pour les verbatims (fichiers intermédiaires). ces fichiers .txt doivent être ensuite convertis au format TEI .xml (voir plus bas)</td><td>oui</td></tr>
<tr><td class="content_norm_orig">rtf</td><td>.htm</td><td class="content_norm_orig">limite la mise en page et le formattage à quelques éléments simples (titres, sous-titres, paragraphes) mais permet l'indexation (recherche dans le texte)</td><td>oui</td></tr>
<tr><td class="content_norm_orig">ppt</td><td>.pdf</td><td class="content_norm_orig">pas d'indexation du texte, mais conservation de la mise en page</td><td>oui</td></tr>
<tr><td class="content_norm_orig">image</td><td>.jpg</td><td class="content_norm_orig">...</td><td>non</td></tr>
<tr><td class="content_norm_orig">image</td><td>.jpg .png .gif</td><td class="content_norm_orig">affichage simple</td><td>oui</td></tr>
<tr><td class="content_norm_orig">audio</td><td>.mp3</td><td class="content_norm_orig">...</td><td>non</td></tr>
</table>
Lorsque le choix de l'encodage est proposé, toujours préférer "UTF-8"<br/>
Expand Down Expand Up @@ -161,7 +161,7 @@ <h2>Description complète de la normalisation d'une enquête</h2>
<tr class="content_norm_expl">
<td>*descr</td>
<td>emplacement du fichier dans le dossier enquête, ou lien externe</td>
<td>/xml /htm /csv /pdf /link /ref</td>
<td>/xml /htm /csv /pdf /img /link /ref</td>
<td>nom dans le site enquête</td>
<td>/preparatory /analyse /verbatim /publication /ese</td>
<td>apparaît en infobulle à coté de chaque document</td>
Expand Down Expand Up @@ -201,6 +201,12 @@ <h2>Description complète de la normalisation d'une enquête</h2>
<td class="content_norm_file content_norm_orig">Rapport d'enquête.doc</td>
<td class="content_norm_orig">export pdf</td></tr>

<tr><td>d234</td><td class="content_norm_file">_data/ image.png</td><td>img</td>
<td>Mon image</td><td>analyse</td>
<td>Photo des chiens de chasse</td><td>00/10/12</td><td></td>
<td class="content_norm_file content_norm_orig">image.raw</td>
<td class="content_norm_orig">save as png</td></tr>

<tr><td>d034</td><td class="content_norm_file">[NP]</td><td>ref</td>
<td>Publication journal TEI</td><td>publication</td>
<td>Article publié p.34-35 du journal TEI</td><td>15/10/12</td><td></td>
Expand Down
5 changes: 3 additions & 2 deletions urls.py
Expand Up @@ -19,8 +19,9 @@
(r'^stream/(?P<eid>\d+)/(?P<aid>\d+)$', 'reanalyseapp.views.stream'),
(r'^getesereport/(?P<eid>\d+)', 'reanalyseapp.views.getEseReport'), # ESE report download
(r'^graph/download/(?P<gid>\d+)', 'reanalyseapp.views.downloadGraph'),
(r'^graph/serve/(?P<gid>\d+).gexf', 'reanalyseapp.views.serveGraph'), # for gexf display (sigma?)
(r'^graph/serve/(?P<did>\d+).pdf', 'reanalyseapp.views.servePdf'), # for simple pdf display
(r'^e/serve/gexf/(?P<gid>\d+).gexf', 'reanalyseapp.views.serveGraph'), # for gexf display (sigma?)
(r'^e/serve/pdf/(?P<did>\d+).pdf', 'reanalyseapp.views.servePdf'), # for simple pdf display
(r'^e/serve/img/(?P<did>\d+)', 'reanalyseapp.views.serveImg'), # for simple img display

######################################################################################################
########## ENQUETES
Expand Down

0 comments on commit 265ff10

Please sign in to comment.