### Format Testing

In [1]:
from pyvalidator.format_validator import SchemaWrapper
from pyvalidator.format_validator import GeneratedSchema
from ruamel.yaml import YAML
from ruamel.yaml.constructor import DuplicateKeyError

In [2]:
import yaml
import pydantic

file_path = "./assets/schema/movies.yml"
# file_path = "./assets/schema/movie.yml"
# file_path = "./assets/schema/test.yml"

yaml = YAML()

try:
    with open(file_path,'r') as f:
        yaml_file = yaml.load(f)  
except DuplicateKeyError as e:
    print(f"Duplicate Key found: {e}") 


Duplicate Key found: while constructing a mapping
  in "./assets/schema/movies.yml", line 7, column 5
found duplicate key "name" with value "{'name': 'content rating', 'type': 'varchar', 'column': 'rating', 'desc': 'Content rating (e.g., R, PG-13)', 'fetch': False}" (original value: "{'name': 'title of the movie', 'type': 'varchar', 'column': 'name', 'desc': 'Title of the movie', 'fetch': True}")
  in "./assets/schema/movies.yml", line 13, column 5

To suppress this check see:
    http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys



In [3]:
print(yaml_file)

NameError: name 'yaml_file' is not defined

In [None]:
schema_key = list(yaml_file.keys())[0]
        
schema = yaml_file[schema_key]
print(schema)
        

schema = GeneratedSchema(**schema)
print(schema)

{'subject_area': 'movies', 'table_info': [{'table': 'MOVIES', 'joins': []}], 'columns': {'name': {'name': 'content rating', 'type': 'varchar', 'column': 'rating', 'desc': 'Content rating (e.g., R, PG-13)', 'fetch': False}, 'genre': {'name': 'genre of the movie', 'type': 'varchar', 'column': 'genre', 'desc': 'Genre of the movie', 'fetch': True}, 'year': {'name': 'production or release year', 'type': 'int', 'column': 'year', 'desc': 'Original production/release year in home country', 'fetch': True}, 'released_date': {'name': 'release date', 'type': 'date', 'column': 'released_date', 'desc': 'release date of a movie', 'fetch': True}, 'released_country': {'name': 'release country', 'type': 'varchar', 'column': 'released_country', 'desc': 'Country where the movie was released', 'fetch': False}, 'country': {'name': 'production country', 'type': 'varchar', 'column': 'country', 'desc': 'Production country (e.g., Denmark, United States)', 'fetch': False}, 'score': {'name': 'average rating', 'ty

In [None]:
columns = schema.columns

In [None]:
from pprint import pprint

pprint(columns)

{'budget': Column(name='production budget', type='number', column='budget', desc='Production budget', primary_key=None, foreign_key=None, table=None, fetch=False),
 'company': Column(name='production company', type='varchar', column='company', desc='Production company of the movie', primary_key=None, foreign_key=None, table=None, fetch=False),
 'country': Column(name='production country', type='varchar', column='country', desc='Production country (e.g., Denmark, United States)', primary_key=None, foreign_key=None, table=None, fetch=False),
 'director': Column(name='director of the movie', type='varchar', column='director', desc='Director of the movie', primary_key=None, foreign_key=None, table=None, fetch=False),
 'genre': Column(name='genre of the movie', type='varchar', column='genre', desc='Genre of the movie', primary_key=None, foreign_key=None, table=None, fetch=True),
 'gross': Column(name='box office gross', type='number', column='gross', desc='Box office gross', primary_key=Non

In [None]:
for column in columns.values():
    print(column.column)

rating
genre
year
released_date
released_country
country
score
votes
budget
gross
director
writer
star
company
runtime


In [None]:
try:
    # validator = SchemaWrapper[GeneratedSchema](root={f"{generated_schema}": {**data}})
    validator = SchemaWrapper[GeneratedSchema](**{"root":yaml_file})
except pydantic.ValidationError as exc:
    print(f"Validation error\n {exc}")

['name', 'genre', 'year', 'released_date', 'released_country', 'country', 'score', 'votes', 'budget', 'gross', 'director', 'writer', 'star', 'company', 'runtime']


### DDL Testing

In [None]:
from pyvalidator.schema_validator import SchemaValidator

In [None]:
file_path = "./assets/DDL/movies.sql"

with open(file_path, 'r') as f:
    ddl = f.read() 

ddl_validator = SchemaValidator(ddl)


In [None]:
ddl_validator.print_ddl()

[{'alter': {},
  'checks': [],
  'collection_items_terminated_by': None,
  'columns': [{'check': None,
               'default': None,
               'name': 'name',
               'nullable': True,
               'references': None,
               'size': None,
               'type': 'VARCHAR',
               'unique': False},
              {'check': None,
               'default': None,
               'name': 'genre',
               'nullable': True,
               'references': None,
               'size': None,
               'type': 'VARCHAR',
               'unique': False},
              {'check': None,
               'default': None,
               'name': 'year',
               'nullable': True,
               'references': None,
               'size': None,
               'type': 'INT',
               'unique': False},
              {'check': None,
               'default': None,
               'name': 'released_date',
               'nullable': True,
               'referenc

In [None]:
yaml_file

{'movies': {'subject_area': 'movies',
  'table_info': [{'table': 'MOVIES', 'joins': []}],
  'columns': {'name': {'name': 'content rating',
    'type': 'varchar',
    'column': 'rating',
    'desc': 'Content rating (e.g., R, PG-13)',
    'fetch': False},
   'genre': {'name': 'genre of the movie',
    'type': 'varchar',
    'column': 'genre',
    'desc': 'Genre of the movie',
    'fetch': True},
   'year': {'name': 'production or release year',
    'type': 'int',
    'column': 'year',
    'desc': 'Original production/release year in home country',
    'fetch': True},
   'released_date': {'name': 'release date',
    'type': 'date',
    'column': 'released_date',
    'desc': 'release date of a movie',
    'fetch': True},
   'released_country': {'name': 'release country',
    'type': 'varchar',
    'column': 'released_country',
    'desc': 'Country where the movie was released',
    'fetch': False},
   'country': {'name': 'production country',
    'type': 'varchar',
    'column': 'country',

In [None]:
ddl_validator.validate_schema(yaml_file)

['name', 'genre', 'year', 'released_date', 'released_country', 'country', 'score', 'votes', 'budget', 'gross', 'director', 'writer', 'star', 'company', 'runtime']


ValueError: DDL column 'name' not found in schema.