Skip to content

Commit

Permalink
Merge pull request #453 from cspurk/master
Browse files Browse the repository at this point in the history
make our string length restrictions explicit in the XML Schema

This fixes #447.
  • Loading branch information
cspurk committed Aug 17, 2012
2 parents 4c5158c + 9715e0a commit 18aa057
Show file tree
Hide file tree
Showing 43 changed files with 8,648 additions and 38 deletions.
2 changes: 1 addition & 1 deletion metashare/repository/fixtures/ILSP10.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<resourceInfo xmlns="http://www.ilsp.gr/META-XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v2.1/META-SHARE-Resource.xsd">
<resourceInfo xmlns="http://www.ilsp.gr/META-XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v3.0/META-SHARE-Resource.xsd">
<identificationInfo>
<resourceName lang="en">REVEAL-THIS Corpus</resourceName>
<description lang="en">Multimodal (video recordings), multilingual, parallel, European Parliament Plenary Sessions and Press Conferences – recorded between Feb2005-May2005; 75h:28min:24sec</description>
Expand Down
2 changes: 1 addition & 1 deletion metashare/repository/fixtures/roundtrip.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<resourceInfo xmlns="http://www.ilsp.gr/META-XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v2.1/META-SHARE-Resource.xsd">
<resourceInfo xmlns="http://www.ilsp.gr/META-XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v3.0/META-SHARE-Resource.xsd">
<identificationInfo>
<resourceName lang="en">morphdb.hu</resourceName>
<description lang="en">Hungarian lexical database and morphological grammar</description>
Expand Down
39 changes: 19 additions & 20 deletions metashare/repository/models.py
Expand Up @@ -35,7 +35,7 @@
# namespace of the META-SHARE metadata XML Schema
SCHEMA_NAMESPACE = 'http://www.ilsp.gr/META-XMLSchema'
# version of the META-SHARE metadata XML Schema
SCHEMA_VERSION = '2.1'
SCHEMA_VERSION = '3.0'

def _compute_documentationInfoType_key():
'''
Expand Down Expand Up @@ -3773,7 +3773,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="audio", editable=False, max_length=10, )
default="audio", editable=False, max_length=1000, )

lingualityInfo = models.OneToOneField("lingualityInfoType_model",
verbose_name='Linguality',
Expand Down Expand Up @@ -4160,9 +4160,8 @@ class Meta:
verbose_name='Number of tracks',
help_text='Specifies the number of audio channels',

max_length=30,
choices=sorted(AUDIOFORMATINFOTYPE_NUMBEROFTRACKS_CHOICES['choices'],
key=lambda choice: choice[1]),
max_length=AUDIOFORMATINFOTYPE_NUMBEROFTRACKS_CHOICES['max_length'],
choices=AUDIOFORMATINFOTYPE_NUMBEROFTRACKS_CHOICES['choices'],
blank=True, null=True, )

recordingQuality = models.CharField(
Expand Down Expand Up @@ -4336,7 +4335,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="text", editable=False, max_length=10, )
default="text", editable=False, max_length=1000, )

lingualityInfo = models.OneToOneField("lingualityInfoType_model",
verbose_name='Linguality',
Expand Down Expand Up @@ -4564,7 +4563,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="video", editable=False, max_length=5, )
default="video", editable=False, max_length=1000, )

lingualityInfo = models.OneToOneField("lingualityInfoType_model",
verbose_name='Linguality',
Expand Down Expand Up @@ -4905,7 +4904,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="image", editable=False, max_length=10, )
default="image", editable=False, max_length=1000, )

# OneToMany field: modalityInfo

Expand Down Expand Up @@ -5238,7 +5237,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="textNumerical", editable=False, max_length=20, )
default="textNumerical", editable=False, max_length=1000, )

# OneToMany field: modalityInfo

Expand Down Expand Up @@ -5404,7 +5403,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="textNgram", editable=False, max_length=10, )
default="textNgram", editable=False, max_length=1000, )

ngramInfo = models.OneToOneField("ngramInfoType_model",
verbose_name='Ngram', )
Expand Down Expand Up @@ -5820,7 +5819,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="text", editable=False, max_length=10, )
default="text", editable=False, max_length=1000, )

creationInfo = models.OneToOneField("creationInfoType_model",
verbose_name='Creation',
Expand Down Expand Up @@ -5911,7 +5910,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="video", editable=False, max_length=10, )
default="video", editable=False, max_length=1000, )

creationInfo = models.OneToOneField("creationInfoType_model",
verbose_name='Creation',
Expand Down Expand Up @@ -6002,7 +6001,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="image", editable=False, max_length=10, )
default="image", editable=False, max_length=1000, )

lingualityInfo = models.OneToOneField("lingualityInfoType_model",
verbose_name='Linguality',
Expand Down Expand Up @@ -6220,7 +6219,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="audio", editable=False, max_length=10, )
default="audio", editable=False, max_length=1000, )

lingualityInfo = models.OneToOneField("lingualityInfoType_model",
verbose_name='Linguality',
Expand Down Expand Up @@ -6296,7 +6295,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="text", editable=False, max_length=10, )
default="text", editable=False, max_length=1000, )

lingualityInfo = models.OneToOneField("lingualityInfoType_model",
verbose_name='Linguality',
Expand Down Expand Up @@ -6368,7 +6367,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="video", editable=False, max_length=10, )
default="video", editable=False, max_length=1000, )

lingualityInfo = models.OneToOneField("lingualityInfoType_model",
verbose_name='Linguality',
Expand Down Expand Up @@ -6444,7 +6443,7 @@ class Meta:
'es. A resource may consist of parts attributed to different types' \
' of media. A tool/service may take as input/output more than one ' \
'different media types.',
default="image", editable=False, max_length=10, )
default="image", editable=False, max_length=1000, )

# OneToMany field: modalityInfo

Expand Down Expand Up @@ -7211,7 +7210,7 @@ class Meta:
resourceType = models.CharField(
verbose_name='Resource',
help_text='Specifies the type of the resource being described',
default="lexicalConceptualResource", editable=False, max_length=30, )
default="lexicalConceptualResource", editable=False, max_length=1000, )

lexicalConceptualResourceType = models.CharField(
verbose_name='Lexical conceptual resource type',
Expand Down Expand Up @@ -7367,7 +7366,7 @@ class Meta:
verbose_name='Resource',
help_text='The type of the resource that a tool or service takes a' \
's input or produces as output',
default="toolService", editable=False, max_length=30, )
default="toolService", editable=False, max_length=1000, )

toolServiceType = models.CharField(
verbose_name='Tool service type',
Expand Down Expand Up @@ -7447,7 +7446,7 @@ class Meta:
resourceType = models.CharField(
verbose_name='Resource',
help_text='Specifies the type of the resource being described',
default="corpus", editable=False, max_length=30, )
default="corpus", editable=False, max_length=1000, )

corpusMediaType = models.OneToOneField("corpusMediaTypeType_model",
verbose_name='Corpus media',
Expand Down
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v2.1/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v3.0/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<identificationInfo>
<resourceName lang="en">FIXTURE-12</resourceName>
<description lang="en">The Multi Modal Verification for Teleservices and Security applications project (M2VTS), running under the European ACTS programme, has produced a database designed to facilitate access control using multimodal identification of human faces. This technique improves recognition efficiency by combining individual modalities (i.e. face and voice). Its relative novelty means that new test material had to be created, since no existing database could offer all modalities needed.The M2VTS database comprises 37 different faces, with 5 shots of each being taken at one-week intervals, or when drastic face changes occurred in the mean time. During each shot, subjects were asked to count from 0 to 9 in their native language (generally French), and to move their heads from left to right, both with and without glasses. The data were then used to create three sequences, for voice, motion and "glasses off". The first sequence can be used for speech verification, 2-D dynamic face verification and speech/lips movement correlation, while the second and third provide information on 3-D face recognition, and may also be used to compare other recognition techniques.For more information: http://www.tele.ucl.ac.be/PROJECTS/M2VTS</description>
Expand Down
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v2.1/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v3.0/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<identificationInfo>
<resourceName lang="en">FIXTURE-6</resourceName>
<description lang="en">* Entries for English-Spanish: Scientific research &amp; mathematical sciences (906 entries), Geosciences (10,215), Computer science, electronics &amp; telecommunications (70,580), Industry (47,578), Transport &amp; Maintenance (12,291), Economy (145,572), Biological sciences (38,989), Communication &amp; media (8,143), Chemical &amp; physical sciences (27,467). * Entries for English-French-German-Spanish: Environment (36,658), Health (66,727), Agriculture &amp; food (25,975), Construction &amp; public works (8,429), Law &amp; policy (56,578), Sports &amp; Leisure (17,312) * Two specialized lexicons: Spanish-English and English-French-German without domain codes: electronics, telematics, law, taxes, customs, etc. (550,000 entries). * Two general lexicons: Spanish-English-French-German and Spanish-English-French-German-Portuguese-Italian (83,000 entries).This terminological database contains, for each domain, a sub-domain indication is given (from 2 sub-domains for Scientific research to 39 for Sports &amp; leisure). Each entry consists of a definition, phraseological unit, abbreviation, usage information, grammatical labels. Format: ASCIIMedium: floppy disk</description>
Expand Down
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v2.1/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v3.0/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<identificationInfo>
<resourceName lang="English">FIXTURE-1</resourceName>
<description lang="English">The CHIL 2007 Evaluation Package was produced within the CHIL Project (Computers in the Human Interaction Loop), in the framework of an Integrated Project (IP 506909) under the European Commission's Sixth Framework Programme. The objective of this project is to create environments in which computers serve humans who focus on interacting with other humans as opposed to having to attend to and being preoccupied with the machines themselves. Instead of computers operating in an isolated manner, and Humans [thrust] in the loop [of computers] we will put Computers in the Human Interaction Loop (CHIL).In this context, the CHIL project produced CHIL Seminars. The CHIL Seminars are scientific presentations given by students, faculty members or invited speakers in the field of multimodal interfaces and speech processing. During the talks, videos of the speaker and the audience from 4 fixed cameras, frontal close ups of the speaker, close talking and far-field microphone data of the speaker’s voice and ambient sounds were recorded. The CHIL 2007 Evaluation Package consists of the following contents:1) A set of audiovisual recordings of interactive seminars. The number of people present in the recording was fixed to be between 3 and 7. The recordings were done between June and September 2006 according to the “CHIL Room Setup” specification.2) Video annotations.3) Orthographic transcriptions.</description>
Expand Down
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v2.1/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<resourceInfo xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v3.0/META-SHARE-Resource.xsd" xmlns:xslt="http://xml.apache.org/xsltm" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ilsp.gr/META-XMLSchema">
<identificationInfo>
<resourceName lang="en">FIXTURE-5</resourceName>
<description lang="en">The Pronunciation lexicon of British place names, surnames and first names was produced by the University of Poitiers (France) through a funding from ELRA in the framework of the European Commission project LRsP&amp;P (Language Resources Production &amp; Packaging - LE4-8335). This lexicon is an SGML-encoded database of British proper names. All entries belong to one or several of the following categories: place-names (a quasi-exhaustive list of toponyms from England, Scotland and Wales), and surnames or first names (a selection of names based on an extensive survey of bibliographic sources in the field of British onomastics combined with lists compiled by the author of this lexicon). The database is composed of 160,000 entries, breaking down as follows:Place-names Number of entries Number of transcriptions England 31,635 44,969 Wales 5,085 9,941 Scotland 15,363 20,397 Total 1 52,083 75,307 Surnames 92,456 115,934 First names 15,461 20,803 Total 2 107,917 136,737 Total 1+2 160,000 212,044 All phonemic transcriptions in the database are based on the SAMPA phonetic alphabet</description>
Expand Down
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<resourceInfo xmlns="http://www.ilsp.gr/META-XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v2.1/META-SHARE-Resource.xsd">
<resourceInfo xmlns="http://www.ilsp.gr/META-XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ilsp.gr/META-XMLSchema http://metashare.ilsp.gr/META-XMLSchema/v3.0/META-SHARE-Resource.xsd">
<identificationInfo>
<resourceName lang="en-us">Italian TTS Speech Corpus (Appen)</resourceName>
<description lang="en-us">The Italian TTS Speech Corpus contains the recordings of 1 native Italian speaker recorded in a studio over 1 channel. The Italian TTS Speech Corpus contains the recordings of 1 native Italian speaker (male, 50 years old) recorded in a studio over 1 channel (Shure SM15 unidirectional professional head-word condenser microphone). The data collection and transcription were performed by Appen (Australia). Speech samples are stored as sequences of 16-bit 22.05 kHz PCM in uncompressed WAV files. The speaker read 3,300 prompted sentences covering all legal triphones and diphones. The database is provided with orthographic transcriptions in SAMPA, including canonical and alternative pronunciation, and syllable, stress and acoustic events markings. All transcriptions were segmented at the utterance (sentence/command word) level, annotated at the word level and checked manually. A pronunciation lexicon including 7,300 headwords (plus variants) is also available. This database is aimed to be used within text-to-speech and speech synthesis applications.</description>
Expand Down

0 comments on commit 18aa057

Please sign in to comment.