In [1]:
import requests
import os
import gzip
import pandas as pd

In [2]:
# Create a new directory named 'data' in the current working directory
if not os.path.exists('data'):
    os.makedirs('data')

# URL of the JSON file containing the Appliances data
appliances_url = "https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_v2/categoryFilesSmall/Appliances_5.json.gz"

# Local file path to save the downloaded JSON file
comp_appliances_path = "data/comp_appliances.json"

# Check if the JSON file already exists to avoid re-downloading it
if not os.path.exists(comp_appliances_path):
    # Send a GET request to the URL and save the content to the local .json file
    response = requests.get(appliances_url)
    with open(comp_appliances_path, 'wb') as file:
        file.write(response.content)


In [3]:

# URL of the JSON file containing the metadata for Appliances
meta_url = "https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_v2/metaFiles2/meta_Appliances.json.gz"

# Local file path to save the downloaded JSON file
comp_meta_appliances_path = "data/comp_meta_appliances.json"

# Check if the JSON file already exists to avoid re-downloading it
if not os.path.exists(comp_meta_appliances_path):
    # Send a GET request to the URL and save the content to the local .json file
    response = requests.get(meta_url)
    with open(comp_meta_appliances_path, 'wb') as file:
        file.write(response.content)


In [4]:

# Local file path to save the decompressed JSON file
appliances_path = "data/appliances.json"
meta_appliances_path = "data/meta_appliances.json"

# Check if the JSON file already exists to avoid re-decompressing it
if not os.path.exists(appliances_path):
    # Open the compressed file and decompress it to the local .json file
    with gzip.open(comp_appliances_path, 'rb') as comp_file:
        with open(appliances_path, 'wb') as file:
            file.write(comp_file.read())

if not os.path.exists(meta_appliances_path):
    # Open the compressed file and decompress it to the local .json file
    with gzip.open(comp_meta_appliances_path, 'rb') as comp_file:
        with open(meta_appliances_path, 'wb') as file:
            file.write(comp_file.read())


In [5]:
appliances = pd.read_json(appliances_path, lines = True)
# appliances_meta = pd.read_json(meta_appliances_path, lines = True)

### Description of our Data

`appliances`: pandas dataframe containing 5-core subsets of Amazon reviews related to Appliances, (all users and items have at least 5 reviews)
`appliances_meta`: pandas dataframe containing metadata related to products and product categories, including price and description

In [11]:
appliances.head()

Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,style,reviewerName,reviewText,summary,unixReviewTime,vote,image
0,5,True,"08 22, 2013",A34A1UP40713F8,B00009W3I4,{'Style:': ' Dryer Vent'},James. Backus,I like this as a vent as well as something tha...,Great product,1377129600,,
1,5,True,"02 8, 2016",A1AHW6I678O6F2,B00009W3PA,{'Size:': ' 6-Foot'},kevin.,good item,Five Stars,1454889600,,
2,5,True,"08 5, 2015",A8R48NKTGCJDQ,B00009W3PA,{'Size:': ' 6-Foot'},CDBrannom,Fit my new LG dryer perfectly.,Five Stars,1438732800,,
3,5,True,"04 24, 2015",AR3OHHHW01A8E,B00009W3PA,{'Size:': ' 6-Foot'},Calvin E Reames,Good value for electric dryers,Perfect size,1429833600,,
4,5,True,"03 21, 2015",A2CIEGHZ7L1WWR,B00009W3PA,{'Size:': ' 6-Foot'},albert j. kong,Price and delivery was excellent.,Five Stars,1426896000,,


In [12]:
# appliances_meta.head()