In [None]:
import pandas as pd
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter

## Prepare JSON Data

In [None]:
# Read and convert data
df = pd.read_json('books.json', encoding='utf-8')
books = df.to_dict(orient='records')
samples = books[:5]

## Serialization & Deserialization

#### Schema Parsing

In [None]:
# Read and store schema content
schema = avro.schema.parse(open("Book.avsc", "rb").read())

#### JSON data serialized into Avro format

In [None]:
# Create a Avro file and store serialized data
with DataFileWriter(open("books.avro", "wb"), DatumWriter(), schema) as writer:
  # Iterates over each item in data and append it to the Avro file
  for item in books:
    writer.append(item)

### Deserialize Avro data back into Python dictionaries

In [None]:
# Read Avro data from the Avro file
reader = DataFileReader(open("books.avro", "rb"), DatumReader())
# Iterates over and print each record in the file
for books in reader:
    print(books)
reader.close()

## Schema Evolution

In [None]:
schema_rates = avro.schema.parse(open("Book_with_rates.avsc", "rb").read())

#### Serialize data with new schema

In [None]:
with DataFileWriter(open("samples.avro", "wb"), DatumWriter(), schema_rates) as writer:
  for item in samples:
    writer.append(item)

#### Deserialize data with both old and new schemas

In [None]:
with DataFileReader(open("samples.avro", "rb"), DatumReader(schema)) as reader:
  for book in reader:
    print(book)

In [None]:
with DataFileReader(open("samples.avro", "rb"), DatumReader(schema_rates)) as reader:
  for book in reader:
    print(book)

## Compatibility Testing

#### Serialization with old schema and Deserialization with new schema

In [None]:
with DataFileWriter(open("samples_oldSchema.avro", "wb"), DatumWriter(), schema) as writer:
  for item in samples:
    writer.append(item)

In [None]:
with DataFileReader(open("samples_oldSchema.avro", "rb"), DatumReader(schema_rates)) as reader:
  for book in reader:
    print(book)

#### Serialization with new schema and Deserialization with old schema

In [None]:
with DataFileWriter(open("samples_newSchema.avro", "wb"), DatumWriter(), schema_rates) as writer:
  for item in samples:
    writer.append(item)

In [None]:
with DataFileReader(open("samples_newSchema.avro", "rb"), DatumReader(schema)) as reader:
  for book in reader:
    print(book)

## Backward-incompatible Change

In [None]:
schema_noImage = schema_rates = avro.schema.parse(open("Book_no_images.avsc", "rb").read())

In [None]:
with DataFileWriter(open("samples.avro", "wb"), DatumWriter(), schema_noImage) as writer:
  for item in samples:
    writer.append(item)