In [None]:
import pandas as pd
import gzip
import json
import requests
import ssl
from datetime import datetime
import os

# --- Constants ---
REVIEWS_URL = 'http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz'
METADATA_URL = 'http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz'
REVIEWS_PATH = 'reviews.json.gz'
METADATA_PATH = 'metadata.json.gz'

def download_data(reviews_url, metadata_url, reviews_path, metadata_path):
    """Downloads the gzipped reviews and metadata files."""
    print("Starting data download...")
    
    # Simple check/download for reviews
    if not os.path.exists(reviews_path):
        print(f"Downloading Reviews file from {reviews_url}...")
        r = requests.get(reviews_url)
        with open(reviews_path, 'wb') as f:
            f.write(r.content)
    
    # Simple check/download for metadata
    if not os.path.exists(metadata_path):
        print(f"Downloading Metadata file from {metadata_url}...")
        r = requests.get(metadata_url)
        with open(metadata_path, 'wb') as f:
            f.write(r.content)
            
    print("Download complete.")



def parse(path):
    """Generator to parse loose JSON lines from a gzipped file."""
   
    if hasattr(ssl, '_create_unverified_context'):
        ssl._create_default_https_context = ssl._create_unverified_context

    g = gzip.open(path, 'r')
    for l in g:
        # Uses eval() for the loose JSON format
        yield eval(l)

def get_dataframe(path):
    """Converts the parsed data into a pandas DataFrame."""
    i = 0
    df = {}
    for d in parse(path):
        df[i] = d
        i += 1
    return pd.DataFrame.from_dict(df, orient='index')

def load_raw_data():
    """Main function to load and return the raw DataFrames."""
    download_data(REVIEWS_URL, METADATA_URL, REVIEWS_PATH, METADATA_PATH)
    
    print("Loading Reviews DataFrame...")
    review_df = get_dataframe(REVIEWS_PATH)
    print(f"Reviews Loaded: {review_df.shape[0]} total reviews.")
    
    print("Loading Metadata DataFrame...")
    metadata_df = get_dataframe(METADATA_PATH)
    
    return review_df, metadata_df

if __name__ == '__main__':
    
    review_data, metadata_data = load_raw_data()
    print(review_data.head(1))