# JSON Schema Validation

Play with schema validation using the json-schema language. This notebook uses the jsonschema library, https://github.com/python-jsonschema/jsonschema. 

In [2]:
import sys

In [6]:
import json

In [3]:
import pandas as pd

In [14]:
import jsonschema

In [16]:
from jsonschema import validate

In [64]:
data=pd.read_csv('mock_genotype.tsv',sep='\t')

In [41]:
schema = {
    "type": "object",
    "properties": {
        "FID": {"type": "number"},
        "IID": {"type": "string"},
        "Miss_Pheno": {"type": "number"},
        "N_Miss": {"type": "number"},
        "N_Geno": {"type": "number"},
        "MAF": {"type": "string"},
    },
    "required": ["FID", "TID", "MAF"]
}

In [31]:
jsonschema.Draft7Validator.check_schema(schema)

In [60]:
data

Unnamed: 0,FID (string),IID (string),Miss_Pheno (boolean),N_Miss (int),N_GENO (int),MAF (float)
0,P554,P554,N,4096,97722,0.02191
1,P557,P557,N,4011,97722,0.02105
2,P558,P558,N,4327,97722,0.02428
3,P562,P562,N,4099,97722,0.02195
4,P564,P564,N,4100,97722,0.02391
5,P567,P567,N,4013,97722,0.02705
6,P568,P568,Y,4227,97722,0.02478
7,P572,P572,N,4199,97722,0.02096
8,P574,P574,N,4034,97722,0.02783
9,P577,P577,N,4245,97722,0.02209


In [65]:
data.reset_index().to_json(orient='records')

'[{"index":0,"FID":"P554","IID":"P554","Miss_Pheno":"N","N_Miss":4096,"N_Geno":97722,"MAF":0.02191},{"index":1,"FID":"P557","IID":"P557","Miss_Pheno":"N","N_Miss":4011,"N_Geno":97722,"MAF":0.02105},{"index":2,"FID":"P558","IID":"P558","Miss_Pheno":"N","N_Miss":4327,"N_Geno":97722,"MAF":0.02428},{"index":3,"FID":"P562","IID":"P562","Miss_Pheno":"N","N_Miss":4099,"N_Geno":97722,"MAF":0.02195},{"index":4,"FID":"P564","IID":"P564","Miss_Pheno":"N","N_Miss":4100,"N_Geno":97722,"MAF":0.02391},{"index":5,"FID":"P567","IID":"P567","Miss_Pheno":"N","N_Miss":4013,"N_Geno":97722,"MAF":0.02705},{"index":6,"FID":"P568","IID":"P568","Miss_Pheno":"Y","N_Miss":4227,"N_Geno":97722,"MAF":0.02478},{"index":7,"FID":"P572","IID":"P572","Miss_Pheno":"N","N_Miss":4199,"N_Geno":97722,"MAF":0.02096},{"index":8,"FID":"P574","IID":"P574","Miss_Pheno":"N","N_Miss":4034,"N_Geno":97722,"MAF":0.02783},{"index":9,"FID":"P577","IID":"P577","Miss_Pheno":"N","N_Miss":4245,"N_Geno":97722,"MAF":0.02209},{"index":10,"FID":

In [66]:
import json

In [70]:
f = open('alumni_salary.json')
jsondata = json.load(f)
f.close()

In [77]:
schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "gpa": {"type": "number"},
        "age": {"type": "number"},
        "salary": {"type": "number"},
    },
}

In [78]:
validate(jsondata, schema=schema)

ValidationError: [{'name': 'John Smith', 'gpa': 3.5, 'age': 25, 'salary': 50000}, {'name': 'Jane Doe', 'gpa': 3.2, 'age': 22, 'salary': 45000}, {'name': 'Bob Johnson', 'gpa': 3.8, 'age': 30, 'salary': 55000}, {'name': 'Sara Lee', 'gpa': 3.0, 'age': 27, 'salary': 40000}, {'name': 'Tom Brown', 'gpa': 3.7, 'age': 32, 'salary': 60000}, {'name': 'Emily Davis', 'gpa': 3.4, 'age': 23, 'salary': 42000}, {'name': 'Michael Miller', 'gpa': 3.9, 'age': 28, 'salary': 65000}, {'name': 'Jessica Wilson', 'gpa': 3.1, 'age': 24, 'salary': 46000}, {'name': 'Matthew Anderson', 'gpa': 3.6, 'age': 29, 'salary': 58000}, {'name': 'Nicholas Thompson', 'gpa': 3.3, 'age': 26, 'salary': 49000}, {'name': 'Ashley Moore', 'gpa': 3.8, 'age': 35, 'salary': 62000}, {'name': 'David Taylor', 'gpa': 3.0, 'age': 31, 'salary': 52000}, {'name': 'Joseph Hernandez', 'gpa': 3.7, 'age': 33, 'salary': 57000}, {'name': 'Brian Martinez', 'gpa': 3.4, 'age': 30, 'salary': 50000}, {'name': 'Brandon Lee', 'gpa': 3.9, 'age': 25, 'salary': 60000}, {'name': 'Adam Clark', 'gpa': 3.1, 'age': 22, 'salary': 45000}, {'name': 'Natalie Rodriguez', 'gpa': 3.6, 'age': 27, 'salary': 48000}, {'name': 'Justin Green', 'gpa': 3.3, 'age': 32, 'salary': 55000}] is not of type 'object'

Failed validating 'type' in schema:
    {'properties': {'age': {'type': 'number'},
                    'gpa': {'type': 'number'},
                    'name': {'type': 'string'},
                    'salary': {'type': 'number'}},
     'type': 'object'}

On instance:
    [{'age': 25, 'gpa': 3.5, 'name': 'John Smith', 'salary': 50000},
     {'age': 22, 'gpa': 3.2, 'name': 'Jane Doe', 'salary': 45000},
     {'age': 30, 'gpa': 3.8, 'name': 'Bob Johnson', 'salary': 55000},
     {'age': 27, 'gpa': 3.0, 'name': 'Sara Lee', 'salary': 40000},
     {'age': 32, 'gpa': 3.7, 'name': 'Tom Brown', 'salary': 60000},
     {'age': 23, 'gpa': 3.4, 'name': 'Emily Davis', 'salary': 42000},
     {'age': 28, 'gpa': 3.9, 'name': 'Michael Miller', 'salary': 65000},
     {'age': 24, 'gpa': 3.1, 'name': 'Jessica Wilson', 'salary': 46000},
     {'age': 29, 'gpa': 3.6, 'name': 'Matthew Anderson', 'salary': 58000},
     {'age': 26, 'gpa': 3.3, 'name': 'Nicholas Thompson', 'salary': 49000},
     {'age': 35, 'gpa': 3.8, 'name': 'Ashley Moore', 'salary': 62000},
     {'age': 31, 'gpa': 3.0, 'name': 'David Taylor', 'salary': 52000},
     {'age': 33, 'gpa': 3.7, 'name': 'Joseph Hernandez', 'salary': 57000},
     {'age': 30, 'gpa': 3.4, 'name': 'Brian Martinez', 'salary': 50000},
     {'age': 25, 'gpa': 3.9, 'name': 'Brandon Lee', 'salary': 60000},
     {'age': 22, 'gpa': 3.1, 'name': 'Adam Clark', 'salary': 45000},
     {'age': 27, 'gpa': 3.6, 'name': 'Natalie Rodriguez', 'salary': 48000},
     {'age': 32, 'gpa': 3.3, 'name': 'Justin Green', 'salary': 55000}]

In [23]:
validate("127.0.0.1", {"format" : "ipv4"}, format_checker=jsonschema.Draft202012Validator.FORMAT_CHECKER)