**Sample JSON Data**

In [1]:
[
    {
        "book_id": 1,
        "title": "The Great Gatsby",
        "author": {
            "first_name": "F. Scott",
            "last_name": "Fitzgerald"
        },
        "genre": "Fiction",
        "details": {
            "publisher": "Scribner",
            "year": 1925,
            "price": 10.99
        }
    },
    {
        "book_id": 2,
        "title": "To Kill a Mockingbird",
        "author": {
            "first_name": "Harper",
            "last_name": "Lee"
        },
        "genre": "Fiction",
        "details": {
            "publisher": "J.B. Lippincott & Co.",
            "year": 1960,
            "price": 7.99
        }
    },
    {
        "book_id": 3,
        "title": "1984",
        "author": {
            "first_name": "George",
            "last_name": "Orwell"
        },
        "genre": "Dystopian",
        "details": {
            "publisher": "Secker & Warburg",
            "year": 1949,
            "price": 8.99
        }
    }
]


[{'book_id': 1,
  'title': 'The Great Gatsby',
  'author': {'first_name': 'F. Scott', 'last_name': 'Fitzgerald'},
  'genre': 'Fiction',
  'details': {'publisher': 'Scribner', 'year': 1925, 'price': 10.99}},
 {'book_id': 2,
  'title': 'To Kill a Mockingbird',
  'author': {'first_name': 'Harper', 'last_name': 'Lee'},
  'genre': 'Fiction',
  'details': {'publisher': 'J.B. Lippincott & Co.',
   'year': 1960,
   'price': 7.99}},
 {'book_id': 3,
  'title': '1984',
  'author': {'first_name': 'George', 'last_name': 'Orwell'},
  'genre': 'Dystopian',
  'details': {'publisher': 'Secker & Warburg', 'year': 1949, 'price': 8.99}}]

**1.Load JSON data into a DataFrame.**

In [2]:
import pandas as pd
import json

# Sample JSON data
json_data = '''
[
    {
        "book_id": 1,
        "title": "The Great Gatsby",
        "author": {
            "first_name": "F. Scott",
            "last_name": "Fitzgerald"
        },
        "genre": "Fiction",
        "details": {
            "publisher": "Scribner",
            "year": 1925,
            "price": 10.99
        }
    },
    {
        "book_id": 2,
        "title": "To Kill a Mockingbird",
        "author": {
            "first_name": "Harper",
            "last_name": "Lee"
        },
        "genre": "Fiction",
        "details": {
            "publisher": "J.B. Lippincott & Co.",
            "year": 1960,
            "price": 7.99
        }
    },
    {
        "book_id": 3,
        "title": "1984",
        "author": {
            "first_name": "George",
            "last_name": "Orwell"
        },
        "genre": "Dystopian",
        "details": {
            "publisher": "Secker & Warburg",
            "year": 1949,
            "price": 8.99
        }
    }
]
'''
data = json.loads(json_data)
books_df = pd.DataFrame(data)
print(books_df)


   book_id                  title  \
0        1       The Great Gatsby   
1        2  To Kill a Mockingbird   
2        3                   1984   

                                              author      genre  \
0  {'first_name': 'F. Scott', 'last_name': 'Fitzg...    Fiction   
1       {'first_name': 'Harper', 'last_name': 'Lee'}    Fiction   
2    {'first_name': 'George', 'last_name': 'Orwell'}  Dystopian   

                                             details  
0  {'publisher': 'Scribner', 'year': 1925, 'price...  
1  {'publisher': 'J.B. Lippincott & Co.', 'year':...  
2  {'publisher': 'Secker & Warburg', 'year': 1949...  


**2. Normalize JSON data to a flat table.**

In [3]:
from pandas import json_normalize

books_flat = json_normalize(data)
print(books_flat)


   book_id                  title      genre author.first_name  \
0        1       The Great Gatsby    Fiction          F. Scott   
1        2  To Kill a Mockingbird    Fiction            Harper   
2        3                   1984  Dystopian            George   

  author.last_name      details.publisher  details.year  details.price  
0       Fitzgerald               Scribner          1925          10.99  
1              Lee  J.B. Lippincott & Co.          1960           7.99  
2           Orwell       Secker & Warburg          1949           8.99  


**3. Extract a nested JSON object to columns.**

In [None]:
books_df = books_flat.copy()
books_df[['author_first_name', 'author_last_name']] = pd.DataFrame(books_df['author'].tolist(), index=books_df.index)
books_df[['publisher', 'year', 'price']] = pd.DataFrame(books_df['details'].tolist(), index=books_df.index)
books_df = books_df.drop(columns=['author', 'details'])
print(books_df)


**4. Convert DataFrame to JSON string.**

In [5]:
json_string = books_df.to_json(orient='records', lines=True)
print(json_string)


{"book_id":1,"title":"The Great Gatsby","genre":"Fiction","author.first_name":"F. Scott","author.last_name":"Fitzgerald","details.publisher":"Scribner","details.year":1925,"details.price":10.99}
{"book_id":2,"title":"To Kill a Mockingbird","genre":"Fiction","author.first_name":"Harper","author.last_name":"Lee","details.publisher":"J.B. Lippincott & Co.","details.year":1960,"details.price":7.99}
{"book_id":3,"title":"1984","genre":"Dystopian","author.first_name":"George","author.last_name":"Orwell","details.publisher":"Secker & Warburg","details.year":1949,"details.price":8.99}



**5. Write a DataFrame to a JSON file.**

In [6]:
books_df.to_json('books.json', orient='records', indent=4)


**6. Load JSON data from a URL.**

In [None]:
import requests

import pandas as pd
import requests

# URL to the JSON data
url = 'https://raw.githubusercontent.com/jdorfman/awesome-json-datasets/master/tests/relaxed.json'

# Fetch the data from the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Load JSON data
    data_from_url = response.json()

    # Convert JSON data to DataFrame
    books_from_url_df = pd.DataFrame(data_from_url)

    # Display the DataFrame
    print(books_from_url_df)
else:
    print(f"Failed to retrieve data: {response.status_code}")



**7. Handle missing keys in JSON objects.**

In [8]:
# Simulate a JSON object with missing keys
json_data_with_missing_keys = '''
[
    {
        "book_id": 1,
        "title": "The Great Gatsby"
    },
    {
        "book_id": 2,
        "title": "To Kill a Mockingbird",
        "author": {
            "first_name": "Harper"
        }
    }
]
'''
data_missing_keys = json.loads(json_data_with_missing_keys)
books_missing_keys_df = pd.json_normalize(data_missing_keys)
print(books_missing_keys_df)



   book_id                  title author.first_name
0        1       The Great Gatsby               NaN
1        2  To Kill a Mockingbird            Harper


**8. Parse JSON data with a specific schema.**

In [None]:
import pandas as pd
import json

# Sample JSON data with some missing or incorrect fields
json_data_with_schema_issues = '''
[
    {
        "book_id": 1,
        "title": "The Great Gatsby",
        "author": {
            "first_name": "F. Scott",
            "last_name": "Fitzgerald"
        },
        "genre": "Fiction",
        "details": {
            "publisher": "Scribner",
            "year": 1925,
            "price": 10.99
        }
    },
    {
        "book_id": "two",  # Incorrect data type
        "title": "To Kill a Mockingbird",
        "author": {
            "first_name": "Harper",
            "last_name": "Lee"
        },
        "genre": "Fiction",
        "details": {
            "publisher": "J.B. Lippincott & Co.",
            "year": 1960,
            "price": 7.99
        }
    },
    {
        "book_id": 3,
        "title": "1984",
        "author": {
            "first_name": "George",
            "last_name": "Orwell"
        },
        "genre": "Dystopian",
        "details": {
            "publisher": "Secker & Warburg",
            "year": 1949,
            "price": "eight point ninety nine"  # Incorrect data type
        }
    }
]
'''

# Load JSON data
data_with_issues = json.loads(json_data_with_schema_issues)
books_df_with_issues = pd.json_normalize(data_with_issues)

# Define a function to validate the data types and required fields
def validate_schema(df):
    errors = []

    # Validate book_id
    if not pd.api.types.is_integer_dtype(df['book_id']):
        errors.append("book_id should be an integer")

    # Validate price
    try:
        df['details.price'] = df['details.price'].astype(float)
    except ValueError:
        errors.append("details.price should be a float")

    # Check required fields
    required_fields = ['book_id', 'title', 'author.first_name', 'author.last_name', 'details.publisher', 'details.year', 'details.price']
    for field in required_fields:
        if field not in df.columns or df[field].isnull().any():
            errors.append(f"{field} is required and should not be null")

    return errors

# Validate the schema
schema_errors = validate_schema(books_df_with_issues)
if schema_errors:
    for error in schema_errors:
        print(f"Schema Error: {error}")
else:
    print("Data conforms to the schema")

# Output the DataFrame
print(books_df_with_issues)


**9. Flatten a deeply nested JSON structure.**

In [10]:
deeply_nested_json = '''
[
    {
        "book_id": 1,
        "title": "The Great Gatsby",
        "author": {
            "first_name": "F. Scott",
            "last_name": "Fitzgerald",
            "bio": {
                "birth_year": 1896,
                "death_year": 1940
            }
        }
    }
]
'''
data_deeply_nested = json.loads(deeply_nested_json)
books_deeply_nested_df = json_normalize(data_deeply_nested, sep='_')
print(books_deeply_nested_df)



   book_id             title author_first_name author_last_name  \
0        1  The Great Gatsby          F. Scott       Fitzgerald   

   author_bio_birth_year  author_bio_death_year  
0                   1896                   1940  


**10. Merge JSON data with another DataFrame.**

In [11]:
# Create another DataFrame
additional_data = {
    'book_id': [1, 2, 3],
    'rating': [4.5, 4.8, 4.2],
    'reviews': [2500, 3400, 4100]
}
additional_df = pd.DataFrame(additional_data)

# Merge with the books DataFrame
merged_df = pd.merge(books_df, additional_df, on='book_id')
print(merged_df)


   book_id                  title      genre author.first_name  \
0        1       The Great Gatsby    Fiction          F. Scott   
1        2  To Kill a Mockingbird    Fiction            Harper   
2        3                   1984  Dystopian            George   

  author.last_name      details.publisher  details.year  details.price  \
0       Fitzgerald               Scribner          1925          10.99   
1              Lee  J.B. Lippincott & Co.          1960           7.99   
2           Orwell       Secker & Warburg          1949           8.99   

   rating  reviews  
0     4.5     2500  
1     4.8     3400  
2     4.2     4100  
