In [93]:
"""
FILE: skeleton_parser.py
------------------
Author: Firas Abuzaid (fabuzaid@stanford.edu)
Author: Perth Charernwattanagul (puch@stanford.edu)
Modified: 04/21/2014

Skeleton parser for CS564 programming project 1. Has useful imports and
functions for parsing, including:

1) Directory handling -- the parser takes a list of eBay json files
and opens each file inside of a loop. You just need to fill in the rest.
2) Dollar value conversions -- the json files store dollar value amounts in
a string like $3,453.23 -- we provide a function to convert it to a string
like XXXXX.xx.
3) Date/time conversions -- the json files store dates/ times in the form
Mon-DD-YY HH:MM:SS -- we wrote a function (transformDttm) that converts to the
for YYYY-MM-DD HH:MM:SS, which will sort chronologically in SQL.

Your job is to implement the parseJson function, which is invoked on each file by
the main function. We create the initial Python dictionary object of items for
you; the rest is up to you!
Happy parsing!
"""

import sys
from json import loads
from re import sub

columnSeparator = "|"

# Dictionary of months used for date transformation
MONTHS = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',\
        'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}

"""
Returns true if a file ends in .json
"""
def isJson(f):
    return len(f) > 5 and f[-5:] == '.json'

"""
Converts month to a number, e.g. 'Dec' to '12'
"""
def transformMonth(mon):
    if mon in MONTHS:
        return MONTHS[mon]
    else:
        return mon

"""
Transforms a timestamp from Mon-DD-YY HH:MM:SS to YYYY-MM-DD HH:MM:SS
"""
def transformDttm(dttm):
    dttm = dttm.strip().split(' ')
    dt = dttm[0].split('-')
    date = '20' + dt[2] + '-'
    date += transformMonth(dt[0]) + '-' + dt[1]
    return date + ' ' + dttm[1]

"""
Transform a dollar value amount from a string like $3,453.23 to XXXXX.xx
"""

def transformDollar(money):
    if money == None or len(money) == 0:
        return money
    return sub(r'[^\d.]', '', money)

"""
Parses a single json file. Currently, there's a loop that iterates over each
item in the data set. Your job is to extend this functionality to create all
of the necessary SQL tables for your database.
"""
def parseJson(json_file):
    # based on the .dat files
#     category_attr =['ItemID','Category']
    items_attr= ['ItemID', 'Name','Currently', 'First_Bid',  'Number_of_Bids', 'Started', 'Ends','Description', 'Seller', 'Buy_Price']
    bids_attr =['ItemID', 'Bidder','Time','Amount']
#     users_attr = ['UserID','Rating', 'Country','Location'] #Done with users_val
    
    with open(json_file, 'r') as f:
        items = loads(f.read())['Items'] # creates a Python dictionary of Items for the supplied json file
        for item in items:
            item = items[4]
            """
            TODO: traverse the items dictionary to extract information from the
            given `json_file' and generate the necessary .dat files to generate
            the SQL tables based on your relation design
            """
            #TODO:find requirements and non requirements
            # transform date and dollar format.
            with open('category.dat', 'a') as category, open('items.dat', 'a') as items, open('users.dat', 'a') as users, open('bids.dat', 'a') as bids:
                #string vals or dat.write()
                items_val=""
                users_val=""
                category_val=""
                bids_val=[]          #String Dictionary. several bid histories
                users_visited= set() # for duplication checker #TODO: figure out to check duplication or adding location and country
                isValid = True

#                 print("----------------")    

                ## for items.dat && users.dat && bids.dat
                for attr in items_attr:
                    if(attr=='Currently' or attr=='First_Bid'):
                        items_val = items_val+ transformDollar(item[attr]) +"|"
                    
                    # for Buy_Price
                    elif(attr=='Buy_Price'): 
                        #When no bid
                        if(item["Number_of_Bids"]=="0"):
                            items_val = items_val + transformDollar(item["Currently"]) +"|"
                        
                        # for bids.dat
                        else:
                            pass #TODO: add function to get final price rom bidding
                    
                    elif(attr=="Started" or attr =="Ends"):  #when transformed to preferred date format
                        items_val = items_val+ transformDttm(item[attr]) +"|"
                        
                    elif(attr=="Seller"):
                        sellerDict= item[attr]
                        #if(user is not saved)
                        #do something about Seller. user has ['UserID','Rating', 'Country','Location']                        
                        if(not sellerDict["UserID"] in users_visited):
                            users_val = users_val + f'{sellerDict["UserID"]}|{sellerDict["Rating"]}|{item["Country"]}|{item["Location"]}'+"\n"
                            
                        items_val = items_val + item['Seller']['UserID'] +"|"
                    else:
                        items_val = items_val + item[attr] +"|"
                    
                ## for category.dat
                category_attr = item["Category"]
                for attr in category_attr:
                    category_val = category_val + item["ItemID"] +"|"
                    category_val = category_val + attr +"\n"

                # for data safety
                if isValid:
                    category.write(category_val)
                    items.write(items_val[:-1]+"\n")
                    users.write(users_val)
                    for bid in bids_val:
                        bids.write(bid[:-1]+"\n")
                print("--------")
                break
                pass
"""
Loops through each json files provided on the command line and passes each file
to the parser
"""
def main(argv):
    if len(argv) < 2:
        print >> sys.stderr, 'Usage: python skeleton_json_parser.py <path to json files>'
        sys.exit(1)
    # loops over all .json files in the argument
    for f in argv[1:]:
        if isJson(f):
            parseJson(f)
            print ("Success parsing " + f)
            
if __name__ == '__main__':
    main([0,"items-0.json"])
#     main(sys.argv)

{'ItemID': '1043495702', 'Name': 'Precious Moments Fig-ANGEL OF MERCY- NURSE', 'Category': ['Collectibles', 'Decorative & Holiday', 'Decorative by Brand', 'Enesco', 'Precious Moments'], 'Currently': '$28.00', 'First_Bid': '$9.99', 'Number_of_Bids': '6', 'Bids': [{'Bid': {'Bidder': {'UserID': 'nobody138', 'Rating': '427', 'Location': 'GOD BLESS AMERICA, FROM SOUTH, MS', 'Country': 'USA'}, 'Time': 'Dec-04-01 23:20:07', 'Amount': '$12.99'}}, {'Bid': {'Bidder': {'UserID': 'danielhb2000', 'Rating': '1', 'Location': 'Huntington Beach, Ca.', 'Country': 'USA'}, 'Time': 'Dec-06-01 02:00:07', 'Amount': '$15.99'}}, {'Bid': {'Bidder': {'UserID': 'boncon123', 'Rating': '106', 'Location': 'Northern Ireland', 'Country': 'United Kingdom'}, 'Time': 'Dec-07-01 04:40:07', 'Amount': '$18.99'}}, {'Bid': {'Bidder': {'UserID': 'watchdenmark', 'Rating': '37', 'Location': 'Munich', 'Country': 'Germany'}, 'Time': 'Dec-08-01 07:20:07', 'Amount': '$22.00'}}, {'Bid': {'Bidder': {'UserID': 'mrwvh', 'Rating': '92', 