# Data Wrangling 1.3 Solutions

In [1]:
import math
import numpy as np
import pandas as pd

import psycopg2

import json

import csv

import pprint

from datetime import datetime as dt

In [2]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [3]:
cursor = connection.cursor()

## You try it - read and understand the structure of the following json files:

* temp_sales_no_header.json
* temp_sales_header.json
* temp_sales_big_data.json

* temp_line_items_no_header.json
* temp_line_items_header.json
* temp_line_items_big_data.json

* temp_customers_no_header.json
* temp_customers_header.json
* temp_customers_big_data.json

* temp_products_no_header.json
* temp_products_header.json
* temp_products_big_data.json

* temp_holidays_no_header.json
* temp_holidays_header.json
* temp_holidays_big_data.json


In [4]:
def my_read_flat_json(file_name, limit_lines, limit_json):
    "read a flat json file, detect if it has a header, detect if it is in big data format"
    
    print("------------------------------------")
    print("   ", file_name)
    print("------------------------------------")
    
    f = open(file_name, "r")
    
    data = f.read()
    
    f.close()
    
    lines = data.splitlines(False)
    
    i = 0
    
    for line in lines:
        print(line)
        i += 1
        if i == limit_lines:
            break
    
    print("\n>>> Printed", limit_lines, "lines of", len(lines), "total lines.")
    
    if lines[0][0] == '{' and lines[0][-1] == '}':
        json_temp = []
        for line in lines:
            json_temp.append(json.loads(line))
    else:
        json_temp = json.loads(data)
    
    if type(json_temp) == dict:
        json_list = json_temp['data']
    else:
        json_list = json_temp
    
    i = 0
    
    for j in json_list:
        print("\n>>>JSON Object #", i, "unformatted:\n\n", j)
        print("\n>>>JSON Object #", i, "pretty printed:\n")
        pprint.pprint(j, sort_dicts=False, indent=2)
        i += 1
        if i == limit_json:
            break;

    print('\n')
    

In [5]:
my_read_flat_json("temp_sales_no_header.json", 25, 3)

------------------------------------
    temp_sales_no_header.json
------------------------------------
[
  {
    "store_id": 1,
    "sale_id": 128112,
    "customer_id": 3491,
    "sale_date": "2020-04-30",
    "total_amount": 24
  },
  {
    "store_id": 1,
    "sale_id": 144249,
    "customer_id": 1597,
    "sale_date": "2020-05-16",
    "total_amount": 84
  },
  {
    "store_id": 1,
    "sale_id": 163141,
    "customer_id": 4159,
    "sale_date": "2020-06-04",
    "total_amount": 96
  },
  {
    "store_id": 1,
    "sale_id": 169216,

>>> Printed 25 lines of 352 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'customer_id': 3491, 'sale_date': '2020-04-30', 'total_amount': 24}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'customer_id': 3491,
  'sale_date': '2020-04-30',
  'total_amount': 24}

>>>JSON Object # 1 unformatted:

 {'store_id': 1, 'sale_id': 144249, 'customer_id': 1597, 'sale_date': '2020-05-16', 'total_amou

In [6]:
my_read_flat_json("temp_sales_header.json", 25, 3)

------------------------------------
    temp_sales_header.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2021-07-10 23:39:40",
  "file_name": "temp_sales_header.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "store_id": 1,
      "sale_id": 128112,
      "customer_id": 3491,
      "sale_date": "2020-04-30",
      "total_amount": 24
    },
    {
      "store_id": 1,
      "sale_id": 144249,
      "customer_id": 1597,
      "sale_date": "2020-05-16",
      "total_amount": 84
    },
    {
      "store_id": 1,
      "sale_id": 163141,
      "customer_id": 4159,

>>> Printed 25 lines of 359 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'customer_id': 3491, 'sale_date': '2020-04-30', 'total_amount': 24}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'customer_id': 3491,
  'sale_date': '2020-04-30',
  '

In [7]:
my_read_flat_json("temp_sales_big_data.json", 25, 3)

------------------------------------
    temp_sales_big_data.json
------------------------------------
{"store_id": 1, "sale_id": 128112, "customer_id": 3491, "sale_date": "2020-04-30", "total_amount": 24}
{"store_id": 1, "sale_id": 144249, "customer_id": 1597, "sale_date": "2020-05-16", "total_amount": 84}
{"store_id": 1, "sale_id": 163141, "customer_id": 4159, "sale_date": "2020-06-04", "total_amount": 96}
{"store_id": 1, "sale_id": 169216, "customer_id": 4198, "sale_date": "2020-06-09", "total_amount": 144}
{"store_id": 1, "sale_id": 179181, "customer_id": 5394, "sale_date": "2020-06-18", "total_amount": 48}
{"store_id": 1, "sale_id": 181897, "customer_id": 1958, "sale_date": "2020-06-20", "total_amount": 48}
{"store_id": 1, "sale_id": 248269, "customer_id": 4260, "sale_date": "2020-08-22", "total_amount": 60}
{"store_id": 1, "sale_id": 250031, "customer_id": 6782, "sale_date": "2020-08-23", "total_amount": 24}
{"store_id": 1, "sale_id": 255285, "customer_id": 563, "sale_date": "202

In [8]:
my_read_flat_json("temp_line_items_no_header.json", 25, 3)

------------------------------------
    temp_line_items_no_header.json
------------------------------------
[
  {
    "store_id": 1,
    "sale_id": 128112,
    "line_item_id": 1,
    "product_id": 1,
    "quantity": 1
  },
  {
    "store_id": 1,
    "sale_id": 128112,
    "line_item_id": 2,
    "product_id": 8,
    "quantity": 1
  },
  {
    "store_id": 1,
    "sale_id": 144249,
    "line_item_id": 1,
    "product_id": 1,
    "quantity": 1
  },
  {
    "store_id": 1,
    "sale_id": 144249,

>>> Printed 25 lines of 1234 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'line_item_id': 1, 'product_id': 1, 'quantity': 1}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'line_item_id': 1,
  'product_id': 1,
  'quantity': 1}

>>>JSON Object # 1 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'line_item_id': 2, 'product_id': 8, 'quantity': 1}

>>>JSON Object # 1 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'line_item

In [9]:
my_read_flat_json("temp_line_items_header.json", 25, 3)

------------------------------------
    temp_line_items_header.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2021-07-10 14:34:57",
  "file_name": "temp_line_items_header.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "store_id": 1,
      "sale_id": 128112,
      "line_item_id": 1,
      "product_id": 1,
      "quantity": 1
    },
    {
      "store_id": 1,
      "sale_id": 128112,
      "line_item_id": 2,
      "product_id": 8,
      "quantity": 1
    },
    {
      "store_id": 1,
      "sale_id": 144249,
      "line_item_id": 1,

>>> Printed 25 lines of 1241 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'line_item_id': 1, 'product_id': 1, 'quantity': 1}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'line_item_id': 1,
  'product_id': 1,
  'quantity': 1}

>>>JSON Object # 1 unformatted:

 {'sto

In [10]:
my_read_flat_json("temp_line_items_big_data.json", 25, 3)

------------------------------------
    temp_line_items_big_data.json
------------------------------------
{"store_id": 1, "sale_id": 128112, "line_item_id": 1, "product_id": 1, "quantity": 1}
{"store_id": 1, "sale_id": 128112, "line_item_id": 2, "product_id": 8, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 1, "product_id": 1, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 2, "product_id": 2, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 3, "product_id": 4, "quantity": 2}
{"store_id": 1, "sale_id": 144249, "line_item_id": 4, "product_id": 6, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 5, "product_id": 8, "quantity": 2}
{"store_id": 1, "sale_id": 163141, "line_item_id": 1, "product_id": 1, "quantity": 3}
{"store_id": 1, "sale_id": 163141, "line_item_id": 2, "product_id": 3, "quantity": 1}
{"store_id": 1, "sale_id": 163141, "line_item_id": 3, "product_id": 5, "quantity": 1}
{"store_id": 1, "sale_id": 16314

In [11]:
my_read_flat_json("temp_customers_no_header.json", 25, 3)

------------------------------------
    temp_customers_no_header.json
------------------------------------
[
  {
    "customer_id": 563,
    "first_name": "Rose",
    "last_name": "Slimings",
    "street": "38 Iowa Street",
    "city": "Berkeley",
    "state": "CA",
    "zip": "94704",
    "closest_store_id": 1,
    "distance": 1
  },
  {
    "customer_id": 1597,
    "first_name": "Norry",
    "last_name": "Macauley",
    "street": "654 Sommers Plaza",
    "city": "Oakland",
    "state": "CA",
    "zip": "94612",
    "closest_store_id": 1,
    "distance": 3
  },
  {
    "customer_id": 1958,

>>> Printed 25 lines of 552 total lines.

>>>JSON Object # 0 unformatted:

 {'customer_id': 563, 'first_name': 'Rose', 'last_name': 'Slimings', 'street': '38 Iowa Street', 'city': 'Berkeley', 'state': 'CA', 'zip': '94704', 'closest_store_id': 1, 'distance': 1}

>>>JSON Object # 0 pretty printed:

{ 'customer_id': 563,
  'first_name': 'Rose',
  'last_name': 'Slimings',
  'street': '38 Iowa Street',

In [12]:
my_read_flat_json("temp_customers_header.json", 25, 3)

------------------------------------
    temp_customers_header.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2021-07-10 14:34:58",
  "file_name": "temp_customers_header.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "customer_id": 563,
      "first_name": "Rose",
      "last_name": "Slimings",
      "street": "38 Iowa Street",
      "city": "Berkeley",
      "state": "CA",
      "zip": "94704",
      "closest_store_id": 1,
      "distance": 1
    },
    {
      "customer_id": 1597,
      "first_name": "Norry",
      "last_name": "Macauley",
      "street": "654 Sommers Plaza",
      "city": "Oakland",
      "state": "CA",

>>> Printed 25 lines of 559 total lines.

>>>JSON Object # 0 unformatted:

 {'customer_id': 563, 'first_name': 'Rose', 'last_name': 'Slimings', 'street': '38 Iowa Street', 'city': 'Berkeley', 'state': 'CA', 'zip': '94704', 'closest_store_id':

In [13]:
my_read_flat_json("temp_customers_big_data.json", 25, 3)

------------------------------------
    temp_customers_big_data.json
------------------------------------
{"customer_id": 563, "first_name": "Rose", "last_name": "Slimings", "street": "38 Iowa Street", "city": "Berkeley", "state": "CA", "zip": "94704", "closest_store_id": 1, "distance": 1}
{"customer_id": 1597, "first_name": "Norry", "last_name": "Macauley", "street": "654 Sommers Plaza", "city": "Oakland", "state": "CA", "zip": "94612", "closest_store_id": 1, "distance": 3}
{"customer_id": 1958, "first_name": "Theresina", "last_name": "Penswick", "street": "5975 Twin Pines Hill", "city": "Berkeley", "state": "CA", "zip": "94707", "closest_store_id": 1, "distance": 3}
{"customer_id": 1991, "first_name": "Kevon", "last_name": "Wickett", "street": "472 Arizona Court", "city": "Berkeley", "state": "CA", "zip": "94707", "closest_store_id": 1, "distance": 3}
{"customer_id": 3491, "first_name": "Siouxie", "last_name": "M'Quharge", "street": "747 Westridge Center", "city": "Alameda", "state"

In [14]:
my_read_flat_json("temp_products_no_header.json", 25, 3)

------------------------------------
    temp_products_no_header.json
------------------------------------
[
  {
    "product_id": 1,
    "description": "Pistachio Salmon"
  },
  {
    "product_id": 2,
    "description": "Teriyaki Chicken"
  },
  {
    "product_id": 3,
    "description": "Spinach Orzo"
  },
  {
    "product_id": 4,
    "description": "Eggplant Lasagna"
  },
  {
    "product_id": 5,
    "description": "Chicken Salad"
  },
  {
    "product_id": 6,
    "description": "Curry Chicken"
  },

>>> Printed 25 lines of 34 total lines.

>>>JSON Object # 0 unformatted:

 {'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 0 pretty printed:

{'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 1 unformatted:

 {'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 1 pretty printed:

{'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 2 unformatted:

 {'product_id': 3, 'description': 'Spinach Orzo'}

>>>JSON Obj

In [15]:
my_read_flat_json("temp_products_header.json", 25, 3)

------------------------------------
    temp_products_header.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2021-07-10 14:34:58",
  "file_name": "temp_products_header.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "product_id": 1,
      "description": "Pistachio Salmon"
    },
    {
      "product_id": 2,
      "description": "Teriyaki Chicken"
    },
    {
      "product_id": 3,
      "description": "Spinach Orzo"
    },
    {
      "product_id": 4,
      "description": "Eggplant Lasagna"
    },
    {
      "product_id": 5,

>>> Printed 25 lines of 41 total lines.

>>>JSON Object # 0 unformatted:

 {'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 0 pretty printed:

{'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 1 unformatted:

 {'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 1 pretty printed:

In [16]:
my_read_flat_json("temp_products_big_data.json", 25, 3)

------------------------------------
    temp_products_big_data.json
------------------------------------
{"product_id": 1, "description": "Pistachio Salmon"}
{"product_id": 2, "description": "Teriyaki Chicken"}
{"product_id": 3, "description": "Spinach Orzo"}
{"product_id": 4, "description": "Eggplant Lasagna"}
{"product_id": 5, "description": "Chicken Salad"}
{"product_id": 6, "description": "Curry Chicken"}
{"product_id": 7, "description": "Tilapia Piccata"}
{"product_id": 8, "description": "Brocolli Stir Fry"}

>>> Printed 25 lines of 8 total lines.

>>>JSON Object # 0 unformatted:

 {'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 0 pretty printed:

{'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 1 unformatted:

 {'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 1 pretty printed:

{'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 2 unformatted:

 {'product_id': 3, 'description': 'Spinach Orzo'}


In [17]:
my_read_flat_json("temp_holidays_no_header.json", 25, 3)

------------------------------------
    temp_holidays_no_header.json
------------------------------------
[
  {
    "holiday_date": "2020-01-01",
    "description": "New Year's Day",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-01-20",
    "description": "MLK Day",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-02-17",
    "description": "President's Day",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-04-12",
    "description": "Easter",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-05-10",
    "description": "Mother's Day",
    "closed_flag": false

>>> Printed 25 lines of 62 total lines.

>>>JSON Object # 0 unformatted:

 {'holiday_date': '2020-01-01', 'description': "New Year's Day", 'closed_flag': False}

>>>JSON Object # 0 pretty printed:

{ 'holiday_date': '2020-01-01',
  'description': "New Year's Day",
  'closed_flag': False}

>>>JSON Object # 1 unformatted:

 {'holiday_date': '2020-01-20', 'description': 'MLK Day', 'clos

In [18]:
my_read_flat_json("temp_holidays_header.json", 25, 3)

------------------------------------
    temp_holidays_header.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2021-07-10 14:34:59",
  "file_name": "temp_holidays_header.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "holiday_date": "2020-01-01",
      "description": "New Year's Day",
      "closed_flag": false
    },
    {
      "holiday_date": "2020-01-20",
      "description": "MLK Day",
      "closed_flag": false
    },
    {
      "holiday_date": "2020-02-17",
      "description": "President's Day",
      "closed_flag": false
    },
    {
      "holiday_date": "2020-04-12",
      "description": "Easter",

>>> Printed 25 lines of 69 total lines.

>>>JSON Object # 0 unformatted:

 {'holiday_date': '2020-01-01', 'description': "New Year's Day", 'closed_flag': False}

>>>JSON Object # 0 pretty printed:

{ 'holiday_date': '2020-01-01',
  'description': "New Year's

In [19]:
my_read_flat_json("temp_holidays_big_data.json", 25, 3)


------------------------------------
    temp_holidays_big_data.json
------------------------------------
{"holiday_date": "2020-01-01", "description": "New Year's Day", "closed_flag": false}
{"holiday_date": "2020-01-20", "description": "MLK Day", "closed_flag": false}
{"holiday_date": "2020-02-17", "description": "President's Day", "closed_flag": false}
{"holiday_date": "2020-04-12", "description": "Easter", "closed_flag": false}
{"holiday_date": "2020-05-10", "description": "Mother's Day", "closed_flag": false}
{"holiday_date": "2020-05-25", "description": "Memorial Day", "closed_flag": false}
{"holiday_date": "2020-06-21", "description": "Father's Day", "closed_flag": false}
{"holiday_date": "2020-07-04", "description": "Independence Day", "closed_flag": false}
{"holiday_date": "2020-09-07", "description": "Labor Day", "closed_flag": false}
{"holiday_date": "2020-11-11", "description": "Veterans Days", "closed_flag": false}
{"holiday_date": "2020-11-26", "description": "Thanksgivin

## You try it - convert the following flat json files to csv format;  read the csv files to verify that they are ready for loading into a database:


* temp_sales_no_header.json
* temp_sales_header.json
* temp_sales_big_data.json

* temp_line_items_no_header.json
* temp_line_items_header.json
* temp_line_items_big_data.json

* temp_customers_no_header.json
* temp_customers_header.json
* temp_customers_big_data.json

* temp_products_no_header.json
* temp_products_header.json
* temp_products_big_data.json

* temp_holidays_no_header.json
* temp_holidays_header.json
* temp_holidays_big_data.json


In [20]:
def my_flat_json_2_csv(file_name):
    "read a flat json file, convert to csv"
        
    f = open(file_name, "r")
    
    data = f.read()
    
    f.close()
    
    output_file_name = file_name[:-4] + "csv"
    
    f = open(output_file_name, "w")
    
    lines = data.splitlines(False)
    
    if lines[0][0] == '{' and lines[0][-1] == '}':
        json_temp = []
        for line in lines:
            json_temp.append(json.loads(line))
    else:
        json_temp = json.loads(data)
    
    if type(json_temp) == dict:
        json_list = json_temp['data']
    else:
        json_list = json_temp
    
    dw = csv.DictWriter(f, json_list[0].keys())
    dw.writeheader()
    dw.writerows(json_list)
    
    f.close()

In [21]:
def my_read_csv_file(file_name, limit):
    "read the csv file and print only the first limit rows"
    
    csv_file = open(file_name, "r")
    
    csv_data = csv.reader(csv_file)
    
    i = 0
    
    for row in csv_data:
        i += 1
        if i <= limit:
            print(row)
            
    print("\nPrinted ", min(limit, i), "lines of ", i, "total lines.")

In [22]:
my_flat_json_2_csv("temp_sales_no_header.json")

In [23]:
my_flat_json_2_csv("temp_sales_header.json")

In [24]:
my_flat_json_2_csv("temp_sales_big_data.json")

In [25]:
my_flat_json_2_csv("temp_line_items_no_header.json")

In [26]:
my_flat_json_2_csv("temp_line_items_header.json")

In [27]:
my_flat_json_2_csv("temp_line_items_big_data.json")

In [28]:
my_flat_json_2_csv("temp_customers_no_header.json")

In [29]:
my_flat_json_2_csv("temp_customers_header.json")

In [30]:
my_flat_json_2_csv("temp_customers_big_data.json")

In [31]:
my_flat_json_2_csv("temp_products_no_header.json")

In [32]:
my_flat_json_2_csv("temp_products_header.json")

In [33]:
my_flat_json_2_csv("temp_products_big_data.json")

In [34]:
my_flat_json_2_csv("temp_holidays_no_header.json")

In [35]:
my_flat_json_2_csv("temp_holidays_header.json")

In [36]:
my_flat_json_2_csv("temp_holidays_big_data.json")

In [37]:
my_read_csv_file("temp_sales_no_header.csv", 25)

['store_id', 'sale_id', 'customer_id', 'sale_date', 'total_amount']
['1', '128112', '3491', '2020-04-30', '24']
['1', '144249', '1597', '2020-05-16', '84']
['1', '163141', '4159', '2020-06-04', '96']
['1', '169216', '4198', '2020-06-09', '144']
['1', '179181', '5394', '2020-06-18', '48']
['1', '181897', '1958', '2020-06-20', '48']
['1', '248269', '4260', '2020-08-22', '60']
['1', '250031', '6782', '2020-08-23', '24']
['1', '255285', '563', '2020-08-29', '36']
['1', '263524', '1991', '2020-09-07', '48']
['2', '105004', '12160', '2020-04-23', '36']
['2', '109083', '11291', '2020-04-27', '132']
['2', '115446', '11779', '2020-05-03', '12']
['2', '115912', '9298', '2020-05-04', '48']
['2', '119996', '14460', '2020-05-08', '36']
['2', '144107', '9046', '2020-06-06', '96']
['2', '158290', '9189', '2020-06-20', '84']
['2', '203726', '12493', '2020-08-08', '132']
['2', '217889', '10295', '2020-08-23', '24']
['2', '218323', '9528', '2020-08-23', '48']
['3', '99402', '17003', '2020-05-01', '36']


In [38]:
my_read_csv_file("temp_sales_header.csv", 25)

['store_id', 'sale_id', 'customer_id', 'sale_date', 'total_amount']
['1', '128112', '3491', '2020-04-30', '24']
['1', '144249', '1597', '2020-05-16', '84']
['1', '163141', '4159', '2020-06-04', '96']
['1', '169216', '4198', '2020-06-09', '144']
['1', '179181', '5394', '2020-06-18', '48']
['1', '181897', '1958', '2020-06-20', '48']
['1', '248269', '4260', '2020-08-22', '60']
['1', '250031', '6782', '2020-08-23', '24']
['1', '255285', '563', '2020-08-29', '36']
['1', '263524', '1991', '2020-09-07', '48']
['2', '105004', '12160', '2020-04-23', '36']
['2', '109083', '11291', '2020-04-27', '132']
['2', '115446', '11779', '2020-05-03', '12']
['2', '115912', '9298', '2020-05-04', '48']
['2', '119996', '14460', '2020-05-08', '36']
['2', '144107', '9046', '2020-06-06', '96']
['2', '158290', '9189', '2020-06-20', '84']
['2', '203726', '12493', '2020-08-08', '132']
['2', '217889', '10295', '2020-08-23', '24']
['2', '218323', '9528', '2020-08-23', '48']
['3', '99402', '17003', '2020-05-01', '36']


In [39]:
my_read_csv_file("temp_sales_big_data.csv", 25)

['store_id', 'sale_id', 'customer_id', 'sale_date', 'total_amount']
['1', '128112', '3491', '2020-04-30', '24']
['1', '144249', '1597', '2020-05-16', '84']
['1', '163141', '4159', '2020-06-04', '96']
['1', '169216', '4198', '2020-06-09', '144']
['1', '179181', '5394', '2020-06-18', '48']
['1', '181897', '1958', '2020-06-20', '48']
['1', '248269', '4260', '2020-08-22', '60']
['1', '250031', '6782', '2020-08-23', '24']
['1', '255285', '563', '2020-08-29', '36']
['1', '263524', '1991', '2020-09-07', '48']
['2', '105004', '12160', '2020-04-23', '36']
['2', '109083', '11291', '2020-04-27', '132']
['2', '115446', '11779', '2020-05-03', '12']
['2', '115912', '9298', '2020-05-04', '48']
['2', '119996', '14460', '2020-05-08', '36']
['2', '144107', '9046', '2020-06-06', '96']
['2', '158290', '9189', '2020-06-20', '84']
['2', '203726', '12493', '2020-08-08', '132']
['2', '217889', '10295', '2020-08-23', '24']
['2', '218323', '9528', '2020-08-23', '48']
['3', '99402', '17003', '2020-05-01', '36']


In [40]:
my_read_csv_file("temp_line_items_no_header.csv", 25)

['store_id', 'sale_id', 'line_item_id', 'product_id', 'quantity']
['1', '128112', '1', '1', '1']
['1', '128112', '2', '8', '1']
['1', '144249', '1', '1', '1']
['1', '144249', '2', '2', '1']
['1', '144249', '3', '4', '2']
['1', '144249', '4', '6', '1']
['1', '144249', '5', '8', '2']
['1', '163141', '1', '1', '3']
['1', '163141', '2', '3', '1']
['1', '163141', '3', '5', '1']
['1', '163141', '4', '7', '1']
['1', '163141', '5', '8', '2']
['1', '169216', '1', '1', '3']
['1', '169216', '2', '3', '1']
['1', '169216', '3', '4', '4']
['1', '169216', '4', '5', '1']
['1', '169216', '5', '6', '2']
['1', '169216', '6', '8', '1']
['1', '179181', '1', '2', '1']
['1', '179181', '2', '3', '1']
['1', '179181', '3', '4', '1']
['1', '179181', '4', '6', '1']
['1', '181897', '1', '1', '1']
['1', '181897', '2', '6', '1']

Printed  25 lines of  177 total lines.


In [41]:
my_read_csv_file("temp_line_items_header.csv", 25)

['store_id', 'sale_id', 'line_item_id', 'product_id', 'quantity']
['1', '128112', '1', '1', '1']
['1', '128112', '2', '8', '1']
['1', '144249', '1', '1', '1']
['1', '144249', '2', '2', '1']
['1', '144249', '3', '4', '2']
['1', '144249', '4', '6', '1']
['1', '144249', '5', '8', '2']
['1', '163141', '1', '1', '3']
['1', '163141', '2', '3', '1']
['1', '163141', '3', '5', '1']
['1', '163141', '4', '7', '1']
['1', '163141', '5', '8', '2']
['1', '169216', '1', '1', '3']
['1', '169216', '2', '3', '1']
['1', '169216', '3', '4', '4']
['1', '169216', '4', '5', '1']
['1', '169216', '5', '6', '2']
['1', '169216', '6', '8', '1']
['1', '179181', '1', '2', '1']
['1', '179181', '2', '3', '1']
['1', '179181', '3', '4', '1']
['1', '179181', '4', '6', '1']
['1', '181897', '1', '1', '1']
['1', '181897', '2', '6', '1']

Printed  25 lines of  177 total lines.


In [42]:
my_read_csv_file("temp_line_items_big_data.csv", 25)

['store_id', 'sale_id', 'line_item_id', 'product_id', 'quantity']
['1', '128112', '1', '1', '1']
['1', '128112', '2', '8', '1']
['1', '144249', '1', '1', '1']
['1', '144249', '2', '2', '1']
['1', '144249', '3', '4', '2']
['1', '144249', '4', '6', '1']
['1', '144249', '5', '8', '2']
['1', '163141', '1', '1', '3']
['1', '163141', '2', '3', '1']
['1', '163141', '3', '5', '1']
['1', '163141', '4', '7', '1']
['1', '163141', '5', '8', '2']
['1', '169216', '1', '1', '3']
['1', '169216', '2', '3', '1']
['1', '169216', '3', '4', '4']
['1', '169216', '4', '5', '1']
['1', '169216', '5', '6', '2']
['1', '169216', '6', '8', '1']
['1', '179181', '1', '2', '1']
['1', '179181', '2', '3', '1']
['1', '179181', '3', '4', '1']
['1', '179181', '4', '6', '1']
['1', '181897', '1', '1', '1']
['1', '181897', '2', '6', '1']

Printed  25 lines of  177 total lines.


In [43]:
my_read_csv_file("temp_customers_no_header.csv", 25)

['customer_id', 'first_name', 'last_name', 'street', 'city', 'state', 'zip', 'closest_store_id', 'distance']
['563', 'Rose', 'Slimings', '38 Iowa Street', 'Berkeley', 'CA', '94704', '1', '1']
['1597', 'Norry', 'Macauley', '654 Sommers Plaza', 'Oakland', 'CA', '94612', '1', '3']
['1958', 'Theresina', 'Penswick', '5975 Twin Pines Hill', 'Berkeley', 'CA', '94707', '1', '3']
['1991', 'Kevon', 'Wickett', '472 Arizona Court', 'Berkeley', 'CA', '94707', '1', '3']
['3491', 'Siouxie', "M'Quharge", '747 Westridge Center', 'Alameda', 'CA', '94501', '1', '6']
['4159', 'Cheryl', 'Broe', '7 Ruskin Alley', 'El Sobrante', 'CA', '94803', '1', '7']
['4198', 'Andreana', 'Drew', '11039 Cordelia Alley', 'El Sobrante', 'CA', '94803', '1', '7']
['4260', 'Dom', 'Risbrough', '3 Northland Crossing', 'Richmond', 'CA', '94805', '1', '7']
['5394', 'Katharina', 'Bavester', '522 Cordelia Lane', 'San Francisco', 'CA', '94102', '1', '10']
['6782', 'Lyndsay', 'Iuorio', '4 Thackeray Road', 'Walnut Creek', 'CA', '94596',

In [44]:
my_read_csv_file("temp_customers_header.csv", 25)

['customer_id', 'first_name', 'last_name', 'street', 'city', 'state', 'zip', 'closest_store_id', 'distance']
['563', 'Rose', 'Slimings', '38 Iowa Street', 'Berkeley', 'CA', '94704', '1', '1']
['1597', 'Norry', 'Macauley', '654 Sommers Plaza', 'Oakland', 'CA', '94612', '1', '3']
['1958', 'Theresina', 'Penswick', '5975 Twin Pines Hill', 'Berkeley', 'CA', '94707', '1', '3']
['1991', 'Kevon', 'Wickett', '472 Arizona Court', 'Berkeley', 'CA', '94707', '1', '3']
['3491', 'Siouxie', "M'Quharge", '747 Westridge Center', 'Alameda', 'CA', '94501', '1', '6']
['4159', 'Cheryl', 'Broe', '7 Ruskin Alley', 'El Sobrante', 'CA', '94803', '1', '7']
['4198', 'Andreana', 'Drew', '11039 Cordelia Alley', 'El Sobrante', 'CA', '94803', '1', '7']
['4260', 'Dom', 'Risbrough', '3 Northland Crossing', 'Richmond', 'CA', '94805', '1', '7']
['5394', 'Katharina', 'Bavester', '522 Cordelia Lane', 'San Francisco', 'CA', '94102', '1', '10']
['6782', 'Lyndsay', 'Iuorio', '4 Thackeray Road', 'Walnut Creek', 'CA', '94596',

In [45]:
my_read_csv_file("temp_customers_big_data.csv", 25)

['customer_id', 'first_name', 'last_name', 'street', 'city', 'state', 'zip', 'closest_store_id', 'distance']
['563', 'Rose', 'Slimings', '38 Iowa Street', 'Berkeley', 'CA', '94704', '1', '1']
['1597', 'Norry', 'Macauley', '654 Sommers Plaza', 'Oakland', 'CA', '94612', '1', '3']
['1958', 'Theresina', 'Penswick', '5975 Twin Pines Hill', 'Berkeley', 'CA', '94707', '1', '3']
['1991', 'Kevon', 'Wickett', '472 Arizona Court', 'Berkeley', 'CA', '94707', '1', '3']
['3491', 'Siouxie', "M'Quharge", '747 Westridge Center', 'Alameda', 'CA', '94501', '1', '6']
['4159', 'Cheryl', 'Broe', '7 Ruskin Alley', 'El Sobrante', 'CA', '94803', '1', '7']
['4198', 'Andreana', 'Drew', '11039 Cordelia Alley', 'El Sobrante', 'CA', '94803', '1', '7']
['4260', 'Dom', 'Risbrough', '3 Northland Crossing', 'Richmond', 'CA', '94805', '1', '7']
['5394', 'Katharina', 'Bavester', '522 Cordelia Lane', 'San Francisco', 'CA', '94102', '1', '10']
['6782', 'Lyndsay', 'Iuorio', '4 Thackeray Road', 'Walnut Creek', 'CA', '94596',

In [46]:
my_read_csv_file("temp_products_no_header.csv", 25)

['product_id', 'description']
['1', 'Pistachio Salmon']
['2', 'Teriyaki Chicken']
['3', 'Spinach Orzo']
['4', 'Eggplant Lasagna']
['5', 'Chicken Salad']
['6', 'Curry Chicken']
['7', 'Tilapia Piccata']
['8', 'Brocolli Stir Fry']

Printed  9 lines of  9 total lines.


In [47]:
my_read_csv_file("temp_products_header.csv", 25)

['product_id', 'description']
['1', 'Pistachio Salmon']
['2', 'Teriyaki Chicken']
['3', 'Spinach Orzo']
['4', 'Eggplant Lasagna']
['5', 'Chicken Salad']
['6', 'Curry Chicken']
['7', 'Tilapia Piccata']
['8', 'Brocolli Stir Fry']

Printed  9 lines of  9 total lines.


In [48]:
my_read_csv_file("temp_products_big_data.csv", 25)

['product_id', 'description']
['1', 'Pistachio Salmon']
['2', 'Teriyaki Chicken']
['3', 'Spinach Orzo']
['4', 'Eggplant Lasagna']
['5', 'Chicken Salad']
['6', 'Curry Chicken']
['7', 'Tilapia Piccata']
['8', 'Brocolli Stir Fry']

Printed  9 lines of  9 total lines.


In [49]:
my_read_csv_file("temp_holidays_no_header.csv", 25)

['holiday_date', 'description', 'closed_flag']
['2020-01-01', "New Year's Day", 'False']
['2020-01-20', 'MLK Day', 'False']
['2020-02-17', "President's Day", 'False']
['2020-04-12', 'Easter', 'False']
['2020-05-10', "Mother's Day", 'False']
['2020-05-25', 'Memorial Day', 'False']
['2020-06-21', "Father's Day", 'False']
['2020-07-04', 'Independence Day', 'False']
['2020-09-07', 'Labor Day', 'False']
['2020-11-11', 'Veterans Days', 'False']
['2020-11-26', 'Thanksgiving', 'True']
['2020-12-25', 'Christmas', 'True']

Printed  13 lines of  13 total lines.


In [50]:
my_read_csv_file("temp_holidays_header.csv", 25)

['holiday_date', 'description', 'closed_flag']
['2020-01-01', "New Year's Day", 'False']
['2020-01-20', 'MLK Day', 'False']
['2020-02-17', "President's Day", 'False']
['2020-04-12', 'Easter', 'False']
['2020-05-10', "Mother's Day", 'False']
['2020-05-25', 'Memorial Day', 'False']
['2020-06-21', "Father's Day", 'False']
['2020-07-04', 'Independence Day', 'False']
['2020-09-07', 'Labor Day', 'False']
['2020-11-11', 'Veterans Days', 'False']
['2020-11-26', 'Thanksgiving', 'True']
['2020-12-25', 'Christmas', 'True']

Printed  13 lines of  13 total lines.


In [51]:
my_read_csv_file("temp_holidays_big_data.csv", 25)

['holiday_date', 'description', 'closed_flag']
['2020-01-01', "New Year's Day", 'False']
['2020-01-20', 'MLK Day', 'False']
['2020-02-17', "President's Day", 'False']
['2020-04-12', 'Easter', 'False']
['2020-05-10', "Mother's Day", 'False']
['2020-05-25', 'Memorial Day', 'False']
['2020-06-21', "Father's Day", 'False']
['2020-07-04', 'Independence Day', 'False']
['2020-09-07', 'Labor Day', 'False']
['2020-11-11', 'Veterans Days', 'False']
['2020-11-26', 'Thanksgiving', 'True']
['2020-12-25', 'Christmas', 'True']

Printed  13 lines of  13 total lines.


## You try it - extract the following database tables into 3 separate flat json tables (no header, header, and big data); read each file to verify:

* temp_sales to temp_sales_no_header_2.json, temp_sales_header_2.json, temp_sales_big_data_2.json
* temp_line_items to temp_line_items_no_header_2.json, temp_line_items_header_2.json, temp_line_items_big_data_2.json
* temp_customers to temp_customers_no_header_2.json, temp_customers_header_2.json, temp_customers_big_data_2.json
* temp_products to temp_products_no_header_2.json, temp_products_header_2.json, temp_products_big_data_2.json
* temp_holidays to temp_holidays_no_header_2.json, temp_holidays_header_2.json, temp_holidays_big_data_2.json


In [52]:
def my_extract_flat_json(query, file_name, file_type):
    "using the query provided, extract to json, file_type: 1 = no header, 2 = header, 3 = big_data"
    
    connection.rollback()
    
    cursor.execute(query)
    
    connection.rollback()

    rows = cursor.fetchall()
    
    list_of_json = []
    
    for row in rows:
        list_of_json.append(row[0])
        
    f = open(file_name, "w")
    
    # flat json with no headers
    if file_type == 1:
        json.dump(list_of_json, f, indent=2)
    
    # flat json with a header
    if file_type == 2:
        template = {"creator": "Acme Gourmet Meals",
                    "timestamp": dt.now().strftime("%Y-%d-%m %H:%M:%S"),
                    "file_name": file_name, 
                    "version": "12.4.7",
                    "legal": "Unauthorized use, duplication, or possession, blah, blah",
                    "data": list_of_json
                   }
        json.dump(template, f, indent=2)
        
    # flat json big data style
    if file_type == 3:
        for j in list_of_json:
            f.write(json.dumps(j) + "\n")
        
    f.close()  
    

In [53]:
query = """

select row_to_json(a) 
from (select *
      from temp_sales
      order by store_id, sale_id) as a
      
"""

my_extract_flat_json(query, "temp_sales_no_header_2.json", 1)
my_extract_flat_json(query, "temp_sales_header_2.json", 2)
my_extract_flat_json(query, "temp_sales_big_data_2.json", 3)


In [54]:
query = """

select row_to_json(a) 
from (select *
      from temp_line_items
      order by store_id, sale_id, line_item_id) as a
      
"""

my_extract_flat_json(query, "temp_line_items_no_header_2.json", 1)
my_extract_flat_json(query, "temp_line_items_header_2.json", 2)
my_extract_flat_json(query, "temp_line_items_big_data_2.json", 3)


In [55]:
query = """

select row_to_json(a) 
from (select *
      from temp_customers
      order by customer_id) as a
      
"""

my_extract_flat_json(query, "temp_customers_no_header_2.json", 1)
my_extract_flat_json(query, "temp_customers_header_2.json", 2)
my_extract_flat_json(query, "temp_customers_big_data_2.json", 3)


In [56]:
query = """

select row_to_json(a) 
from (select *
      from temp_products
      order by product_id) as a
      
"""

my_extract_flat_json(query, "temp_products_no_header_2.json", 1)
my_extract_flat_json(query, "temp_products_header_2.json", 2)
my_extract_flat_json(query, "temp_products_big_data_2.json", 3)


In [57]:
query = """

select row_to_json(a) 
from (select *
      from temp_holidays
      order by holiday_date) as a
      
"""

my_extract_flat_json(query, "temp_holidays_no_header_2.json", 1)
my_extract_flat_json(query, "temp_holidays_header_2.json", 2)
my_extract_flat_json(query, "temp_holidays_big_data_2.json", 3)


In [58]:
my_read_flat_json("temp_sales_no_header_2.json", 25, 3)

------------------------------------
    temp_sales_no_header_2.json
------------------------------------
[
  {
    "store_id": 1,
    "sale_id": 128112,
    "customer_id": 3491,
    "sale_date": "2020-04-30",
    "total_amount": 24
  },
  {
    "store_id": 1,
    "sale_id": 144249,
    "customer_id": 1597,
    "sale_date": "2020-05-16",
    "total_amount": 84
  },
  {
    "store_id": 1,
    "sale_id": 163141,
    "customer_id": 4159,
    "sale_date": "2020-06-04",
    "total_amount": 96
  },
  {
    "store_id": 1,
    "sale_id": 169216,

>>> Printed 25 lines of 352 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'customer_id': 3491, 'sale_date': '2020-04-30', 'total_amount': 24}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'customer_id': 3491,
  'sale_date': '2020-04-30',
  'total_amount': 24}

>>>JSON Object # 1 unformatted:

 {'store_id': 1, 'sale_id': 144249, 'customer_id': 1597, 'sale_date': '2020-05-16', 'total_am

In [59]:
my_read_flat_json("temp_sales_header_2.json", 25, 3)

------------------------------------
    temp_sales_header_2.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2025-07-02 00:03:34",
  "file_name": "temp_sales_header_2.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "store_id": 1,
      "sale_id": 128112,
      "customer_id": 3491,
      "sale_date": "2020-04-30",
      "total_amount": 24
    },
    {
      "store_id": 1,
      "sale_id": 144249,
      "customer_id": 1597,
      "sale_date": "2020-05-16",
      "total_amount": 84
    },
    {
      "store_id": 1,
      "sale_id": 163141,
      "customer_id": 4159,

>>> Printed 25 lines of 359 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'customer_id': 3491, 'sale_date': '2020-04-30', 'total_amount': 24}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'customer_id': 3491,
  'sale_date': '2020-04-30',

In [60]:
my_read_flat_json("temp_sales_big_data_2.json", 25, 3)

------------------------------------
    temp_sales_big_data_2.json
------------------------------------
{"store_id": 1, "sale_id": 128112, "customer_id": 3491, "sale_date": "2020-04-30", "total_amount": 24}
{"store_id": 1, "sale_id": 144249, "customer_id": 1597, "sale_date": "2020-05-16", "total_amount": 84}
{"store_id": 1, "sale_id": 163141, "customer_id": 4159, "sale_date": "2020-06-04", "total_amount": 96}
{"store_id": 1, "sale_id": 169216, "customer_id": 4198, "sale_date": "2020-06-09", "total_amount": 144}
{"store_id": 1, "sale_id": 179181, "customer_id": 5394, "sale_date": "2020-06-18", "total_amount": 48}
{"store_id": 1, "sale_id": 181897, "customer_id": 1958, "sale_date": "2020-06-20", "total_amount": 48}
{"store_id": 1, "sale_id": 248269, "customer_id": 4260, "sale_date": "2020-08-22", "total_amount": 60}
{"store_id": 1, "sale_id": 250031, "customer_id": 6782, "sale_date": "2020-08-23", "total_amount": 24}
{"store_id": 1, "sale_id": 255285, "customer_id": 563, "sale_date": "2

In [61]:
my_read_flat_json("temp_line_items_no_header_2.json", 25, 3)

------------------------------------
    temp_line_items_no_header_2.json
------------------------------------
[
  {
    "store_id": 1,
    "sale_id": 128112,
    "line_item_id": 1,
    "product_id": 1,
    "quantity": 1
  },
  {
    "store_id": 1,
    "sale_id": 128112,
    "line_item_id": 2,
    "product_id": 8,
    "quantity": 1
  },
  {
    "store_id": 1,
    "sale_id": 144249,
    "line_item_id": 1,
    "product_id": 1,
    "quantity": 1
  },
  {
    "store_id": 1,
    "sale_id": 144249,

>>> Printed 25 lines of 1234 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'line_item_id': 1, 'product_id': 1, 'quantity': 1}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'line_item_id': 1,
  'product_id': 1,
  'quantity': 1}

>>>JSON Object # 1 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'line_item_id': 2, 'product_id': 8, 'quantity': 1}

>>>JSON Object # 1 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'line_it

In [62]:
my_read_flat_json("temp_line_items_header_2.json", 25, 3)

------------------------------------
    temp_line_items_header_2.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2025-07-02 00:03:35",
  "file_name": "temp_line_items_header_2.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "store_id": 1,
      "sale_id": 128112,
      "line_item_id": 1,
      "product_id": 1,
      "quantity": 1
    },
    {
      "store_id": 1,
      "sale_id": 128112,
      "line_item_id": 2,
      "product_id": 8,
      "quantity": 1
    },
    {
      "store_id": 1,
      "sale_id": 144249,
      "line_item_id": 1,

>>> Printed 25 lines of 1241 total lines.

>>>JSON Object # 0 unformatted:

 {'store_id': 1, 'sale_id': 128112, 'line_item_id': 1, 'product_id': 1, 'quantity': 1}

>>>JSON Object # 0 pretty printed:

{ 'store_id': 1,
  'sale_id': 128112,
  'line_item_id': 1,
  'product_id': 1,
  'quantity': 1}

>>>JSON Object # 1 unformatted:

 {

In [63]:
my_read_flat_json("temp_line_items_big_data_2.json", 25, 3)

------------------------------------
    temp_line_items_big_data_2.json
------------------------------------
{"store_id": 1, "sale_id": 128112, "line_item_id": 1, "product_id": 1, "quantity": 1}
{"store_id": 1, "sale_id": 128112, "line_item_id": 2, "product_id": 8, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 1, "product_id": 1, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 2, "product_id": 2, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 3, "product_id": 4, "quantity": 2}
{"store_id": 1, "sale_id": 144249, "line_item_id": 4, "product_id": 6, "quantity": 1}
{"store_id": 1, "sale_id": 144249, "line_item_id": 5, "product_id": 8, "quantity": 2}
{"store_id": 1, "sale_id": 163141, "line_item_id": 1, "product_id": 1, "quantity": 3}
{"store_id": 1, "sale_id": 163141, "line_item_id": 2, "product_id": 3, "quantity": 1}
{"store_id": 1, "sale_id": 163141, "line_item_id": 3, "product_id": 5, "quantity": 1}
{"store_id": 1, "sale_id": 163

In [64]:
my_read_flat_json("temp_customers_no_header_2.json", 25, 3)

------------------------------------
    temp_customers_no_header_2.json
------------------------------------
[
  {
    "customer_id": 563,
    "first_name": "Rose",
    "last_name": "Slimings",
    "street": "38 Iowa Street",
    "city": "Berkeley",
    "state": "CA",
    "zip": "94704",
    "closest_store_id": 1,
    "distance": 1
  },
  {
    "customer_id": 1597,
    "first_name": "Norry",
    "last_name": "Macauley",
    "street": "654 Sommers Plaza",
    "city": "Oakland",
    "state": "CA",
    "zip": "94612",
    "closest_store_id": 1,
    "distance": 3
  },
  {
    "customer_id": 1958,

>>> Printed 25 lines of 552 total lines.

>>>JSON Object # 0 unformatted:

 {'customer_id': 563, 'first_name': 'Rose', 'last_name': 'Slimings', 'street': '38 Iowa Street', 'city': 'Berkeley', 'state': 'CA', 'zip': '94704', 'closest_store_id': 1, 'distance': 1}

>>>JSON Object # 0 pretty printed:

{ 'customer_id': 563,
  'first_name': 'Rose',
  'last_name': 'Slimings',
  'street': '38 Iowa Street

In [65]:
my_read_flat_json("temp_customers_header_2.json", 25, 3)

------------------------------------
    temp_customers_header_2.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2025-07-02 00:03:36",
  "file_name": "temp_customers_header_2.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "customer_id": 563,
      "first_name": "Rose",
      "last_name": "Slimings",
      "street": "38 Iowa Street",
      "city": "Berkeley",
      "state": "CA",
      "zip": "94704",
      "closest_store_id": 1,
      "distance": 1
    },
    {
      "customer_id": 1597,
      "first_name": "Norry",
      "last_name": "Macauley",
      "street": "654 Sommers Plaza",
      "city": "Oakland",
      "state": "CA",

>>> Printed 25 lines of 559 total lines.

>>>JSON Object # 0 unformatted:

 {'customer_id': 563, 'first_name': 'Rose', 'last_name': 'Slimings', 'street': '38 Iowa Street', 'city': 'Berkeley', 'state': 'CA', 'zip': '94704', 'closest_store_

In [66]:
my_read_flat_json("temp_customers_big_data_2.json", 25, 3)

------------------------------------
    temp_customers_big_data_2.json
------------------------------------
{"customer_id": 563, "first_name": "Rose", "last_name": "Slimings", "street": "38 Iowa Street", "city": "Berkeley", "state": "CA", "zip": "94704", "closest_store_id": 1, "distance": 1}
{"customer_id": 1597, "first_name": "Norry", "last_name": "Macauley", "street": "654 Sommers Plaza", "city": "Oakland", "state": "CA", "zip": "94612", "closest_store_id": 1, "distance": 3}
{"customer_id": 1958, "first_name": "Theresina", "last_name": "Penswick", "street": "5975 Twin Pines Hill", "city": "Berkeley", "state": "CA", "zip": "94707", "closest_store_id": 1, "distance": 3}
{"customer_id": 1991, "first_name": "Kevon", "last_name": "Wickett", "street": "472 Arizona Court", "city": "Berkeley", "state": "CA", "zip": "94707", "closest_store_id": 1, "distance": 3}
{"customer_id": 3491, "first_name": "Siouxie", "last_name": "M'Quharge", "street": "747 Westridge Center", "city": "Alameda", "stat

In [67]:
my_read_flat_json("temp_products_no_header_2.json", 25, 3)

------------------------------------
    temp_products_no_header_2.json
------------------------------------
[
  {
    "product_id": 1,
    "description": "Pistachio Salmon"
  },
  {
    "product_id": 2,
    "description": "Teriyaki Chicken"
  },
  {
    "product_id": 3,
    "description": "Spinach Orzo"
  },
  {
    "product_id": 4,
    "description": "Eggplant Lasagna"
  },
  {
    "product_id": 5,
    "description": "Chicken Salad"
  },
  {
    "product_id": 6,
    "description": "Curry Chicken"
  },

>>> Printed 25 lines of 34 total lines.

>>>JSON Object # 0 unformatted:

 {'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 0 pretty printed:

{'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 1 unformatted:

 {'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 1 pretty printed:

{'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 2 unformatted:

 {'product_id': 3, 'description': 'Spinach Orzo'}

>>>JSON O

In [68]:
my_read_flat_json("temp_products_header_2.json", 25, 3)

------------------------------------
    temp_products_header_2.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2025-07-02 00:03:36",
  "file_name": "temp_products_header_2.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "product_id": 1,
      "description": "Pistachio Salmon"
    },
    {
      "product_id": 2,
      "description": "Teriyaki Chicken"
    },
    {
      "product_id": 3,
      "description": "Spinach Orzo"
    },
    {
      "product_id": 4,
      "description": "Eggplant Lasagna"
    },
    {
      "product_id": 5,

>>> Printed 25 lines of 41 total lines.

>>>JSON Object # 0 unformatted:

 {'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 0 pretty printed:

{'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 1 unformatted:

 {'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 1 pretty prin

In [69]:
my_read_flat_json("temp_products_big_data_2.json", 25, 3)

------------------------------------
    temp_products_big_data_2.json
------------------------------------
{"product_id": 1, "description": "Pistachio Salmon"}
{"product_id": 2, "description": "Teriyaki Chicken"}
{"product_id": 3, "description": "Spinach Orzo"}
{"product_id": 4, "description": "Eggplant Lasagna"}
{"product_id": 5, "description": "Chicken Salad"}
{"product_id": 6, "description": "Curry Chicken"}
{"product_id": 7, "description": "Tilapia Piccata"}
{"product_id": 8, "description": "Brocolli Stir Fry"}

>>> Printed 25 lines of 8 total lines.

>>>JSON Object # 0 unformatted:

 {'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 0 pretty printed:

{'product_id': 1, 'description': 'Pistachio Salmon'}

>>>JSON Object # 1 unformatted:

 {'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 1 pretty printed:

{'product_id': 2, 'description': 'Teriyaki Chicken'}

>>>JSON Object # 2 unformatted:

 {'product_id': 3, 'description': 'Spinach Orzo'

In [70]:
my_read_flat_json("temp_holidays_no_header_2.json", 25, 3)

------------------------------------
    temp_holidays_no_header_2.json
------------------------------------
[
  {
    "holiday_date": "2020-01-01",
    "description": "New Year's Day",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-01-20",
    "description": "MLK Day",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-02-17",
    "description": "President's Day",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-04-12",
    "description": "Easter",
    "closed_flag": false
  },
  {
    "holiday_date": "2020-05-10",
    "description": "Mother's Day",
    "closed_flag": false

>>> Printed 25 lines of 62 total lines.

>>>JSON Object # 0 unformatted:

 {'holiday_date': '2020-01-01', 'description': "New Year's Day", 'closed_flag': False}

>>>JSON Object # 0 pretty printed:

{ 'holiday_date': '2020-01-01',
  'description': "New Year's Day",
  'closed_flag': False}

>>>JSON Object # 1 unformatted:

 {'holiday_date': '2020-01-20', 'description': 'MLK Day', 'cl

In [71]:
my_read_flat_json("temp_holidays_header_2.json", 25, 3)

------------------------------------
    temp_holidays_header_2.json
------------------------------------
{
  "creator": "Acme Gourmet Meals",
  "timestamp": "2025-07-02 00:03:37",
  "file_name": "temp_holidays_header_2.json",
  "version": "12.4.7",
  "legal": "Unauthorized use, duplication, or possession, blah, blah",
  "data": [
    {
      "holiday_date": "2020-01-01",
      "description": "New Year's Day",
      "closed_flag": false
    },
    {
      "holiday_date": "2020-01-20",
      "description": "MLK Day",
      "closed_flag": false
    },
    {
      "holiday_date": "2020-02-17",
      "description": "President's Day",
      "closed_flag": false
    },
    {
      "holiday_date": "2020-04-12",
      "description": "Easter",

>>> Printed 25 lines of 69 total lines.

>>>JSON Object # 0 unformatted:

 {'holiday_date': '2020-01-01', 'description': "New Year's Day", 'closed_flag': False}

>>>JSON Object # 0 pretty printed:

{ 'holiday_date': '2020-01-01',
  'description': "New Ye

In [72]:
my_read_flat_json("temp_holidays_big_data_2.json", 25, 3)

------------------------------------
    temp_holidays_big_data_2.json
------------------------------------
{"holiday_date": "2020-01-01", "description": "New Year's Day", "closed_flag": false}
{"holiday_date": "2020-01-20", "description": "MLK Day", "closed_flag": false}
{"holiday_date": "2020-02-17", "description": "President's Day", "closed_flag": false}
{"holiday_date": "2020-04-12", "description": "Easter", "closed_flag": false}
{"holiday_date": "2020-05-10", "description": "Mother's Day", "closed_flag": false}
{"holiday_date": "2020-05-25", "description": "Memorial Day", "closed_flag": false}
{"holiday_date": "2020-06-21", "description": "Father's Day", "closed_flag": false}
{"holiday_date": "2020-07-04", "description": "Independence Day", "closed_flag": false}
{"holiday_date": "2020-09-07", "description": "Labor Day", "closed_flag": false}
{"holiday_date": "2020-11-11", "description": "Veterans Days", "closed_flag": false}
{"holiday_date": "2020-11-26", "description": "Thanksgiv