diff --git a/kingfisher_scrapy/exceptions.py b/kingfisher_scrapy/exceptions.py
index f1f0e1910..080eded2b 100644
--- a/kingfisher_scrapy/exceptions.py
+++ b/kingfisher_scrapy/exceptions.py
@@ -10,9 +10,5 @@ class SpiderArgumentError(KingfisherScrapyError):
     """Raised when a spider argument's value is invalid"""
 
 
-class MissingRequiredFieldError(KingfisherScrapyError, KeyError):
-    """Raised when an item is missing a required field"""
-
-
 class MissingNextLinkError(KingfisherScrapyError):
     """Raised when a next link is not found on the first page of results"""
diff --git a/kingfisher_scrapy/item_schema/File.json b/kingfisher_scrapy/item_schema/File.json
new file mode 100644
index 000000000..c726798e4
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/File.json
@@ -0,0 +1,20 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "allOf": [
+    {
+      "$ref": "item.json#/definitions/KingfisherFileItem"
+    }
+  ],
+  "type": "object",
+  "properties": {
+    "post_to_api": {
+      "type": "boolean"
+    },
+    "path": {
+      "type": "string"
+    },
+    "files_store": {
+      "type": "string"
+    }
+  }
+}
diff --git a/kingfisher_scrapy/item_schema/FileError.json b/kingfisher_scrapy/item_schema/FileError.json
new file mode 100644
index 000000000..8f1b935d0
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/FileError.json
@@ -0,0 +1,18 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "allOf": [
+    {
+      "$ref": "item.json#/definitions/KingfisherItem"
+    }
+  ],
+  "type": "object",
+  "properties": {
+    "errors": {
+      "type": "string",
+      "minLength": 1
+    }
+  },
+  "required": [
+    "errors"
+  ]
+}
diff --git a/kingfisher_scrapy/item_schema/FileItem.json b/kingfisher_scrapy/item_schema/FileItem.json
new file mode 100644
index 000000000..3e49413fb
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/FileItem.json
@@ -0,0 +1,18 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "allOf": [
+    {
+      "$ref": "item.json#/definitions/KingfisherFileItem"
+    }
+  ],
+  "type": "object",
+  "properties": {
+    "number": {
+      "type": "integer",
+      "minimum": 1
+    }
+  },
+  "required": [
+    "number"
+  ]
+}
diff --git a/kingfisher_scrapy/item_schema/item.json b/kingfisher_scrapy/item_schema/item.json
new file mode 100644
index 000000000..7ded5a014
--- /dev/null
+++ b/kingfisher_scrapy/item_schema/item.json
@@ -0,0 +1,62 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "definitions": {
+    "KingfisherItem": {
+      "type": "object",
+      "properties": {
+        "file_name": {
+          "type": "string",
+          "pattern": "^[^/]+$"
+        },
+        "url": {
+          "type": "string",
+          "format": "uri"
+        }
+      },
+      "required": [
+        "file_name",
+        "url"
+      ]
+    },
+    "KingfisherFileItem": {
+      "allOf": [
+        {
+          "$ref": "#/definitions/KingfisherItem"
+        }
+      ],
+      "type": "object",
+      "properties": {
+        "data_type": {
+          "type": "string",
+          "enum": [
+            "record",
+            "release",
+            "record_list",
+            "release_list",
+            "compiled_release",
+            "record_package",
+            "release_package",
+            "record_package_list",
+            "release_package_list",
+            "record_package_list_in_results",
+            "release_package_list_in_results",
+            "release_package_json_lines",
+            "record_package_json_lines",
+            "release_package_in_ocdsReleasePackage_in_list_in_results",
+            "release_in_Release"
+          ]
+        },
+        "encoding": {
+          "type": "string"
+        },
+        "data": {
+          "minLength": 1
+        }
+      },
+      "required": [
+        "data",
+        "data_type"
+      ]
+    }
+  }
+}
diff --git a/kingfisher_scrapy/items.py b/kingfisher_scrapy/items.py
index a05d5c2bf..7ce0e95e5 100644
--- a/kingfisher_scrapy/items.py
+++ b/kingfisher_scrapy/items.py
@@ -1,23 +1,12 @@
 # https://docs.scrapy.org/en/latest/topics/items.html
-import scrapy
 
-from kingfisher_scrapy.exceptions import MissingRequiredFieldError
+import scrapy
 
 
 class KingfisherItem(scrapy.Item):
     file_name = scrapy.Field()
     url = scrapy.Field()
-
-    def validate(self):
-        """
-        Raises an error if any required field is missing.
-
-        :raises kingfisher_scrapy.extensions.MissingRequiredFieldError: if any required field is missing
-        """
-        if hasattr(self, 'required'):
-            for field in self.required:
-                if field not in self:
-                    raise MissingRequiredFieldError(field)
+    validate = True
 
 
 class File(KingfisherItem):
@@ -32,13 +21,6 @@ class File(KingfisherItem):
     path = scrapy.Field()
     files_store = scrapy.Field()
 
-    required = [
-        'file_name',
-        'url',
-        'data',
-        'data_type',
-    ]
-
 
 class FileItem(KingfisherItem):
     number = scrapy.Field()
@@ -46,20 +28,6 @@ class FileItem(KingfisherItem):
     data_type = scrapy.Field()
     encoding = scrapy.Field()
 
-    required = [
-        'number',
-        'file_name',
-        'url',
-        'data',
-        'data_type',
-    ]
-
 
 class FileError(KingfisherItem):
     errors = scrapy.Field()
-
-    required = [
-        'file_name',
-        'url',
-        'errors',
-    ]
diff --git a/kingfisher_scrapy/pipelines.py b/kingfisher_scrapy/pipelines.py
index 72255757e..68172e696 100644
--- a/kingfisher_scrapy/pipelines.py
+++ b/kingfisher_scrapy/pipelines.py
@@ -1,18 +1,31 @@
 # https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 # https://docs.scrapy.org/en/latest/topics/signals.html#item-signals
+
+import os
+import pathlib
+
+import jsonref as jsonref
+from jsonschema import FormatChecker
+from jsonschema.validators import Draft4Validator
+
 from kingfisher_scrapy.items import File, FileItem
 
 
 class Validate:
     def __init__(self):
+        self.validators = {}
         self.files = set()
         self.file_items = set()
+        schema_path = pathlib.Path(os.path.dirname(os.path.abspath(__file__)), 'item_schema')
+        for item in ('File', 'FileError', 'FileItem'):
+            filename = os.path.join(schema_path, f'{item}.json')
+            with open(filename) as f:
+                schema = jsonref.load(f, base_uri=schema_path.as_uri() + '/')
+            self.validators[item] = Draft4Validator(schema, format_checker=FormatChecker())
 
     def process_item(self, item, spider):
         if hasattr(item, 'validate'):
-            # We call this in the item pipeline to guarantee that all items are validated. However, its backtrace isn't
-            # as helpful for debugging, so we could also call it in ``BaseSpider`` if this becomes an issue.
-            item.validate()
+            self.validators.get(item.__class__.__name__).validate(dict(item))
 
         if isinstance(item, FileItem):
             key = (item['file_name'], item['number'])
diff --git a/kingfisher_scrapy/spiders/afghanistan_records.py b/kingfisher_scrapy/spiders/afghanistan_records.py
index 7bd84cf2e..9e5060e7d 100644
--- a/kingfisher_scrapy/spiders/afghanistan_records.py
+++ b/kingfisher_scrapy/spiders/afghanistan_records.py
@@ -7,6 +7,13 @@
 
 
 class AfghanistanRecords(SimpleSpider):
+    """
+    API documentation
+      https://ocds.ageops.net/
+    Spider arguments
+      sample
+        Downloads the first record returned by the record list endpoint.
+    """
     name = 'afghanistan_records'
     data_type = 'record'
 
diff --git a/kingfisher_scrapy/spiders/afghanistan_releases.py b/kingfisher_scrapy/spiders/afghanistan_releases.py
index 80653118d..7257cacaf 100644
--- a/kingfisher_scrapy/spiders/afghanistan_releases.py
+++ b/kingfisher_scrapy/spiders/afghanistan_releases.py
@@ -7,6 +7,13 @@
 
 
 class AfghanistanReleases(SimpleSpider):
+    """
+    API documentation
+      https://ocds.ageops.net/
+    Spider arguments
+      sample
+        Downloads the first release returned by the release endpoint of the API.
+    """
     name = 'afghanistan_releases'
     data_type = 'release'
 
diff --git a/kingfisher_scrapy/spiders/argentina_buenos_aires.py b/kingfisher_scrapy/spiders/argentina_buenos_aires.py
index 4b1461a65..d6d3bbf67 100644
--- a/kingfisher_scrapy/spiders/argentina_buenos_aires.py
+++ b/kingfisher_scrapy/spiders/argentina_buenos_aires.py
@@ -8,10 +8,10 @@
 
 class ArgentinaBuenosAires(ZipSpider):
     """
-    Bulk download documentation
-      https://data.buenosaires.gob.ar/dataset/buenos-aires-compras/archivo/2a3d077c-71b6-4ba7-8924-f3e38cf1b8fc
     API documentation
       https://data.buenosaires.gob.ar/acerca/ckan
+    Bulk download documentation
+      https://data.buenosaires.gob.ar/dataset/buenos-aires-compras/archivo/2a3d077c-71b6-4ba7-8924-f3e38cf1b8fc
     Spider arguments
       sample
         Downloads the zip file and sends 10 releases to kingfisher process.
diff --git a/kingfisher_scrapy/spiders/argentina_vialidad.py b/kingfisher_scrapy/spiders/argentina_vialidad.py
index 160ec7902..385ff6652 100644
--- a/kingfisher_scrapy/spiders/argentina_vialidad.py
+++ b/kingfisher_scrapy/spiders/argentina_vialidad.py
@@ -4,6 +4,13 @@
 
 
 class ArgentinaVialidad(SimpleSpider):
+    """
+    API documentation
+      https://datosabiertos.vialidad.gob.ar/ui/index.html#!/datos_abiertos
+    Spider arguments
+      sample
+        Ignored, data is downloaded from a single JSON file.
+    """
     name = 'argentina_vialidad'
     data_type = 'release_package_list'
 
diff --git a/kingfisher_scrapy/spiders/canada_buyandsell.py b/kingfisher_scrapy/spiders/canada_buyandsell.py
index 546e7fcb8..416f78d61 100644
--- a/kingfisher_scrapy/spiders/canada_buyandsell.py
+++ b/kingfisher_scrapy/spiders/canada_buyandsell.py
@@ -3,6 +3,13 @@
 
 
 class CanadaBuyAndSell(SimpleSpider):
+    """
+    API documentation
+      https://buyandsell.gc.ca/procurement-data/open-contracting-data-standard-pilot/download-ocds-pilot-data
+    Spider arguments
+      sample
+        Downloads a release package with data for the oldest fiscal year available (2013-2014).
+    """
     name = 'canada_buyandsell'
     data_type = 'release_package'
 
diff --git a/kingfisher_scrapy/spiders/canada_montreal.py b/kingfisher_scrapy/spiders/canada_montreal.py
index 25b5ea275..39778c882 100644
--- a/kingfisher_scrapy/spiders/canada_montreal.py
+++ b/kingfisher_scrapy/spiders/canada_montreal.py
@@ -7,6 +7,13 @@
 
 
 class CanadaMontreal(SimpleSpider):
+    """
+    API documentation
+      http://donnees.ville.montreal.qc.ca/dataset/contrats-et-subventions-api
+    Spider arguments
+      sample
+        Downloads the first page of releases returned by the main endpoint.
+    """
     name = 'canada_montreal'
     data_type = 'release_package'
     step = 10000
diff --git a/kingfisher_scrapy/spiders/colombia.py b/kingfisher_scrapy/spiders/colombia.py
index dbac11d97..e4eb357de 100644
--- a/kingfisher_scrapy/spiders/colombia.py
+++ b/kingfisher_scrapy/spiders/colombia.py
@@ -29,14 +29,22 @@ class Colombia(LinksSpider):
         If ``from_date`` is provided and ``until_date`` don't, defaults to today.
     """
     name = 'colombia'
-    next_page_formatter = staticmethod(parameters('page'))
+    next_page_formatter = staticmethod(parameters('_id'))
     default_from_date = '2011-01-01'
 
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = super().from_crawler(crawler, *args, **kwargs)
+        if (spider.from_date or spider.until_date) and hasattr(spider, 'year'):
+            raise scrapy.exceptions.CloseSpider('You cannot specify both a year spider argument and '
+                                                'from_date/until_date spider argument(s).')
+        return spider
+
     def start_requests(self):
         base_url = 'https://apiocds.colombiacompra.gov.co:8443/apiCCE2.0/rest/releases'
         if hasattr(self, 'year'):
             base_url += f'/page/{int(self.year)}'
-        if self.from_date or self.until_date:
+        elif self.from_date or self.until_date:
             from_date = self.from_date.strftime(self.date_format)
             until_date = self.until_date.strftime(self.date_format)
             base_url += f'/dates/{from_date}/{until_date}'
diff --git a/kingfisher_scrapy/spiders/dominican_republic.py b/kingfisher_scrapy/spiders/dominican_republic.py
index 9d7121de7..85aa10b45 100644
--- a/kingfisher_scrapy/spiders/dominican_republic.py
+++ b/kingfisher_scrapy/spiders/dominican_republic.py
@@ -9,6 +9,13 @@
 
 
 class DominicanRepublic(BaseSpider):
+    """
+    Bulk download documentation
+      https://www.dgcp.gob.do/estandar-mundial-ocds/
+    Spider arguments
+      sample
+        Downloads a release package for the oldest year (2018, first link in the downloads page).
+    """
     name = 'dominican_republic'
 
     def start_requests(self):
diff --git a/kingfisher_scrapy/spiders/france.py b/kingfisher_scrapy/spiders/france.py
index 57423f6c9..bb3586589 100644
--- a/kingfisher_scrapy/spiders/france.py
+++ b/kingfisher_scrapy/spiders/france.py
@@ -7,6 +7,13 @@
 
 
 class France(SimpleSpider):
+    """
+    Swagger API documentation
+      https://doc.data.gouv.fr/api/reference/
+    Spider arguments
+      sample
+        Downloads the first OCDS package found using the CKAN API.
+    """
     name = 'france'
     data_type = 'release_package'
 
diff --git a/kingfisher_scrapy/spiders/georgia_records.py b/kingfisher_scrapy/spiders/georgia_records.py
index 66fe6d7a2..aa5d28c00 100644
--- a/kingfisher_scrapy/spiders/georgia_records.py
+++ b/kingfisher_scrapy/spiders/georgia_records.py
@@ -5,6 +5,13 @@
 
 
 class GeorgiaRecords(LinksSpider):
+    """
+    Swagger API documentation
+      https://odapi.spa.ge/api/swagger.ui
+    Spider arguments
+      sample
+        Downloads the first page of packages returned by the record list endpoint.
+    """
     name = 'georgia_records'
     data_type = 'record_package'
     next_page_formatter = staticmethod(parameters('page'))
diff --git a/kingfisher_scrapy/spiders/georgia_releases.py b/kingfisher_scrapy/spiders/georgia_releases.py
index 51954dfbd..6f10324d8 100644
--- a/kingfisher_scrapy/spiders/georgia_releases.py
+++ b/kingfisher_scrapy/spiders/georgia_releases.py
@@ -5,6 +5,13 @@
 
 
 class GeorgiaReleases(LinksSpider):
+    """
+    Swagger API documentation
+      https://odapi.spa.ge/api/swagger.ui
+    Spider arguments
+      sample
+        Downloads the first page of packages returned by the release list endpoint.
+    """
     name = 'georgia_releases'
     data_type = 'release_package'
     next_page_formatter = staticmethod(parameters('page'))
diff --git a/kingfisher_scrapy/spiders/honduras_cost.py b/kingfisher_scrapy/spiders/honduras_cost.py
index b0525e1f7..d58c9ad45 100644
--- a/kingfisher_scrapy/spiders/honduras_cost.py
+++ b/kingfisher_scrapy/spiders/honduras_cost.py
@@ -4,6 +4,11 @@
 
 
 class HondurasCoST(SimpleSpider):
+    """
+    Spider arguments
+      sample
+        Ignored, a single file is downloaded.
+    """
     name = 'honduras_cost'
     data_type = 'record_package'
 
diff --git a/kingfisher_scrapy/spiders/honduras_oncae.py b/kingfisher_scrapy/spiders/honduras_oncae.py
index 0d770660e..8ee680ac5 100644
--- a/kingfisher_scrapy/spiders/honduras_oncae.py
+++ b/kingfisher_scrapy/spiders/honduras_oncae.py
@@ -5,6 +5,13 @@
 
 
 class HondurasONCAE(ZipSpider):
+    """
+    Bulk download documentation
+      http://oncae.gob.hn/datosabiertos
+    Spider arguments
+      sample
+        Downloads the first package listed on the downloads page.
+    """
     name = 'honduras_oncae'
     data_type = 'release_package'
 
diff --git a/kingfisher_scrapy/spiders/honduras_portal_bulk_files.py b/kingfisher_scrapy/spiders/honduras_portal_bulk_files.py
index f82b55236..2ba869669 100644
--- a/kingfisher_scrapy/spiders/honduras_portal_bulk_files.py
+++ b/kingfisher_scrapy/spiders/honduras_portal_bulk_files.py
@@ -7,6 +7,13 @@
 
 
 class HondurasPortalBulkFiles(SimpleSpider):
+    """
+    Bulk download documentation
+      http://www.contratacionesabiertas.gob.hn/descargas/
+    Spider arguments
+      sample
+        Downloads the first package listed in http://www.contratacionesabiertas.gob.hn/api/v1/descargas/?format=json.
+    """
     name = 'honduras_portal_bulk_files'
     data_type = 'release_package'
 
diff --git a/kingfisher_scrapy/spiders/indonesia_bandung.py b/kingfisher_scrapy/spiders/indonesia_bandung.py
index 535f4065d..32a42be8b 100644
--- a/kingfisher_scrapy/spiders/indonesia_bandung.py
+++ b/kingfisher_scrapy/spiders/indonesia_bandung.py
@@ -26,6 +26,9 @@ class IndonesiaBandung(BaseSpider):
             contract year number
           uniqid
             id number
+    Spider arguments
+      sample
+        Downloads the first release listed for 2013
     """
     name = 'indonesia_bandung'
 
diff --git a/kingfisher_scrapy/spiders/mexico_administracion_publica_federal.py b/kingfisher_scrapy/spiders/mexico_administracion_publica_federal.py
index 977990595..5183c5fe0 100644
--- a/kingfisher_scrapy/spiders/mexico_administracion_publica_federal.py
+++ b/kingfisher_scrapy/spiders/mexico_administracion_publica_federal.py
@@ -9,7 +9,11 @@
 
 class MexicoAdministracionPublicaFederal(SimpleSpider):
     """
-    Bulk downloads: https://datos.gob.mx/busca/dataset/concentrado-de-contrataciones-abiertas-de-la-apf
+    Bulk download documentation
+      https://datos.gob.mx/busca/dataset/concentrado-de-contrataciones-abiertas-de-la-apf
+    Spider arguments
+      sample
+        Downloads the records on the first page of the list endpoint.
     """
     name = 'mexico_administracion_publica_federal'
     data_type = 'record_package_list_in_results'
diff --git a/kingfisher_scrapy/spiders/moldova.py b/kingfisher_scrapy/spiders/moldova.py
index fa9134a8a..04d9698a5 100644
--- a/kingfisher_scrapy/spiders/moldova.py
+++ b/kingfisher_scrapy/spiders/moldova.py
@@ -5,6 +5,11 @@
 
 
 class Moldova(SimpleSpider):
+    """
+    Spider arguments
+      sample
+        Downloads the first page of records for each available endpoint (budgets, tenders).
+    """
     name = 'moldova'
     data_type = 'record_package'
 
diff --git a/kingfisher_scrapy/spiders/moldova_old.py b/kingfisher_scrapy/spiders/moldova_old.py
index db50f1d71..938b1113e 100644
--- a/kingfisher_scrapy/spiders/moldova_old.py
+++ b/kingfisher_scrapy/spiders/moldova_old.py
@@ -3,6 +3,13 @@
 
 
 class MoldovaOld(SimpleSpider):
+    """
+    Bulk download documentation
+      http://opencontracting.date.gov.md/downloads
+    Spider arguments
+      sample
+        Downloads a single JSON file containing data for 2017.
+    """
     name = 'moldova_old'
     data_type = 'release_package'
 
diff --git a/kingfisher_scrapy/spiders/scotland.py b/kingfisher_scrapy/spiders/scotland.py
index e8cb5973e..b35330105 100644
--- a/kingfisher_scrapy/spiders/scotland.py
+++ b/kingfisher_scrapy/spiders/scotland.py
@@ -5,6 +5,13 @@
 
 
 class Scotland(SimpleSpider):
+    """
+    API documentation
+      https://api.publiccontractsscotland.gov.uk/v1
+    Spider arguments
+      sample
+        Downloads packages for releases dated one year ago, for each notice type available.
+    """
     name = 'scotland'
     data_type = 'release_package'
 
diff --git a/kingfisher_scrapy/spiders/uk_contracts_finder.py b/kingfisher_scrapy/spiders/uk_contracts_finder.py
index 07cf63842..5fd8407c7 100644
--- a/kingfisher_scrapy/spiders/uk_contracts_finder.py
+++ b/kingfisher_scrapy/spiders/uk_contracts_finder.py
@@ -5,6 +5,11 @@
 
 
 class UKContractsFinder(SimpleSpider):
+    """
+    Spider arguments
+      sample
+        Downloads the first page of release packages returned by the main endpoint.
+    """
     name = 'uk_contracts_finder'
     data_type = 'release_package_list_in_results'
     encoding = 'iso-8859-1'
diff --git a/kingfisher_scrapy/spiders/uk_fts.py b/kingfisher_scrapy/spiders/uk_fts.py
index ca15b7ebc..a26c32d49 100644
--- a/kingfisher_scrapy/spiders/uk_fts.py
+++ b/kingfisher_scrapy/spiders/uk_fts.py
@@ -5,6 +5,11 @@
 
 
 class UKContractsFinder(LinksSpider):
+    """
+    Spider arguments
+      sample
+        Downloads the first release package returned by the main endpoint.
+    """
     name = 'uk_fts'
     data_type = 'release_package_in_ocdsReleasePackage_in_list_in_results'
     next_page_formatter = staticmethod(parameters('cursor'))
diff --git a/requirements.in b/requirements.in
index cb4eca0d0..41899567b 100644
--- a/requirements.in
+++ b/requirements.in
@@ -3,8 +3,11 @@
 
 ijson>=3
 jsonpointer
+jsonref
+jsonschema
 rarfile
 requests
+rfc3987
 Scrapy
 scrapyd-client
 sentry-sdk
diff --git a/requirements.txt b/requirements.txt
index 0bea0b9df..65ebc1554 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile
 #
-attrs==19.3.0             # via automat, service-identity, twisted
+attrs==19.3.0             # via automat, jsonschema, service-identity, twisted
 automat==0.8.0            # via twisted
 certifi==2019.11.28       # via requests, sentry-sdk
 cffi==1.13.2              # via cryptography
@@ -15,8 +15,11 @@ cssselect==1.1.0          # via parsel, scrapy
 hyperlink==19.0.0         # via twisted
 idna==2.8                 # via hyperlink, requests
 ijson==3.0.3
+importlib-metadata==1.6.1  # via jsonschema
 incremental==17.5.0       # via twisted
 jsonpointer==2.0
+jsonref==0.2
+jsonschema==3.2.0
 lxml==4.4.2               # via parsel, scrapy
 parsel==1.5.2             # via scrapy
 protego==0.1.16           # via scrapy
@@ -26,17 +29,20 @@ pycparser==2.19           # via cffi
 pydispatcher==2.0.5       # via scrapy
 pyhamcrest==1.9.0         # via twisted
 pyopenssl==19.1.0         # via scrapy
+pyrsistent==0.16.0        # via jsonschema
 queuelib==1.5.0           # via scrapy
 rarfile==3.1
 requests==2.22.0
+rfc3987==1.3.8
 scrapy==1.8.0
 scrapyd-client==1.1.0
 sentry-sdk==0.14.4
 service-identity==18.1.0  # via scrapy
-six==1.13.0               # via automat, cryptography, parsel, protego, pyhamcrest, pyopenssl, scrapy, scrapyd-client, w3lib
+six==1.13.0               # via automat, cryptography, jsonschema, parsel, protego, pyhamcrest, pyopenssl, pyrsistent, scrapy, scrapyd-client, w3lib
 twisted==20.3.0           # via scrapy
 urllib3==1.25.7           # via requests, sentry-sdk
 w3lib==1.21.0             # via parsel, scrapy
+zipp==3.1.0               # via importlib-metadata
 zope.interface==4.7.1     # via scrapy, twisted
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 9f58a35a3..e010b745f 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -21,13 +21,15 @@ flake8==3.7.9
 hyperlink==19.0.0
 idna==2.8
 ijson==3.0.3
-importlib-metadata==1.3.0  # via pluggy, pytest
+importlib-metadata==1.6.1
 incremental==17.5.0
 isort==4.3.21
 jsonpointer==2.0
+jsonref==0.2
+jsonschema==3.2.0
 lxml==4.4.2
 mccabe==0.6.1             # via flake8
-more-itertools==8.0.2     # via pytest, zipp
+more-itertools==8.0.2     # via pytest
 packaging==19.2           # via pytest
 parsel==1.5.2
 pip-tools==5.1.0
@@ -43,11 +45,13 @@ pyflakes==2.1.1           # via flake8
 pyhamcrest==1.9.0
 pyopenssl==19.1.0
 pyparsing==2.4.5          # via packaging
+pyrsistent==0.16.0
 pytest-cov==2.8.1
 pytest==5.3.2
 queuelib==1.5.0
 rarfile==3.1
 requests==2.22.0
+rfc3987==1.3.8
 scrapy==1.8.0
 scrapyd-client==1.1.0
 sentry-sdk==0.14.4
@@ -57,7 +61,7 @@ twisted==20.3.0
 urllib3==1.25.7
 w3lib==1.21.0
 wcwidth==0.1.7            # via pytest
-zipp==0.6.0               # via importlib-metadata
+zipp==3.1.0
 zope.interface==4.7.1
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/setup.py b/setup.py
index ad683d5b7..784a06960 100644
--- a/setup.py
+++ b/setup.py
@@ -8,6 +8,10 @@
         'kingfisher_scrapy',
         'kingfisher_scrapy.spiders',
     ],
+    package_data={
+        'kingfisher_scrapy': ['item_schema/*.json'],
+    },
+    include_package_data=True,
     entry_points={
         'scrapy': [
             'settings = kingfisher_scrapy.settings',
diff --git a/tests/test_validate.py b/tests/test_validate.py
index a89fa4ba3..6233a1799 100644
--- a/tests/test_validate.py
+++ b/tests/test_validate.py
@@ -1,7 +1,7 @@
 import pytest
+from jsonschema import ValidationError
 
-from kingfisher_scrapy.exceptions import MissingRequiredFieldError
-from kingfisher_scrapy.items import File, FileItem
+from kingfisher_scrapy.items import File, FileError, FileItem
 from kingfisher_scrapy.pipelines import Validate
 from tests import spider_with_crawler
 
@@ -9,20 +9,85 @@
 def test_process_item():
     pipeline = Validate()
     item = File({
-        'file_name': '',
-        'data': '',
-        'data_type': '',
-        'url': '',
+        'file_name': 'test',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
     })
 
     assert pipeline.process_item(item, None) == item
 
+    item['data'] = item['data'].encode('ascii')
+    item['file_name'] = 'test2'
+
+    assert pipeline.process_item(item, None) == item
+
 
 def test_process_item_error():
     pipeline = Validate()
-    item = File()
+    item = File({
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
+    })
+
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+    item['file_name'] = 'test'
+    item['data_type'] = 'not a valid data type'
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+
+
+def test_process_file_item():
+    pipeline = Validate()
+    item = FileItem({
+        'file_name': 'test',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
+        'number': 1
+    })
+    assert pipeline.process_item(item, None) == item
+
 
-    with pytest.raises(MissingRequiredFieldError):
+def test_process_file_item_error():
+    pipeline = Validate()
+    item = FileItem({
+        'file_name': 'test',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://test.com',
+        'number': "2"
+    })
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+    item['number'] = None
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+
+
+def test_process_file_error():
+    pipeline = Validate()
+    item = FileError({
+        'file_name': 'test',
+        'url': 'http://test.com',
+        'errors': 'Error'
+    })
+    assert pipeline.process_item(item, None) == item
+
+
+def test_process_file_item_error_error():
+    pipeline = Validate()
+    item = FileError({
+        'file_name': 'test',
+        'url': 'http://test.com'
+    })
+    with pytest.raises(ValidationError):
+        pipeline.process_item(item, None)
+    item['errors'] = 'Error'
+    item['url'] = 'not an url'
+    with pytest.raises(ValidationError):
         pipeline.process_item(item, None)
 
 
@@ -31,9 +96,9 @@ def test_duplicate_file(caplog):
     spider = spider_with_crawler()
     item = File({
         'file_name': 'test1',
-        'data': '',
-        'data_type': '',
-        'url': '',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://example.com',
     })
 
     pipeline.process_item(item, spider)
@@ -51,9 +116,9 @@ def test_duplicate_file_item(caplog):
     spider = spider_with_crawler()
     item = FileItem({
         'file_name': 'test1',
-        'data': '',
-        'data_type': '',
-        'url': '',
+        'data': 'data',
+        'data_type': 'release_package',
+        'url': 'http://example.com',
         'number': 1
     })