In [4]:
import json

'''This file takes as input the building info list from the bld_info.json file and processes it to remove unnecessary keys.  Currently it parses the following keys from the :
1. address
2. floorPlans
3. buildingAttributes
4. amenitySummary
5. assignedSchools
6. walkScore
7. transitScore
8. bikeScore
9. amenityDetails
10. detailedPetPolicy
'''

# open the building information and store it as bld_info_list
with open('C:/Projects/Housing_Price_Prediction/data_processing/raw_bld_info.json', 'r') as f:
    raw_bld_info_list = f.read()
bld_info_list = json.loads(raw_bld_info_list)


def clean_address(address_dict):
    '''fixes the address of the address dictionary of each building dictionary
    '''
    try:
        address = ""
        for key, value in address_dict.items():
            if key != '__typename' and key != 'neighborhood':
                address = address + value + " "
        return address[0:-2]
    except AttributeError as e:
        print("building address is the wrong format or is already parsed")


def clean_floorPlans(building_info_dict):
    '''Processes the building dictionary into multiple apartment unit dictionaries. 
    Creates a list of apartment units for each unit in each plan of the different floor plans.
    Returns a list of apartment dictionaries unit information of each apartment.  Each index contains different floor plan attributes, but the same building information
    '''

    apt_info_list = []
    try:
        # iterate through each floor plan (dictionary) in the building's floor plan list
        for floor_plan_dict in building_info_dict['floorPlans']:

            # append the wanted floor plan attributes (keys) to the processed_floor_plan_dict
            processed_floor_plan_dict = {}
            wanted_floor_plan_keys = [
                'baths', 'beds', 'floorPlanUnitPhotos', 'name', 'photos', 'sqft', 'description']
            for key, value in floor_plan_dict.items():
                if key in wanted_floor_plan_keys:
                    processed_floor_plan_dict.update({key: value})

            # iterate through the key, value pairs in the floor plan dictionary
            for key, value in floor_plan_dict.items():

                # check if the key is the 'units' key
                if key == 'units':
                    units_list = value

                    # iterate through each unit in the units list of each floor plan of each building
                    for unit_dict in units_list:

                        # append the wanted unit attributes (keys) to the processed_unit_dict
                        processed_unit_dict = {}
                        wanted_unit_keys = ['unitNumber', 'zpid', 'availableFrom', 'hasApprovedThirdPartyVirtualTour', 'price']
                        for key, value in unit_dict.items():
                            if key in wanted_unit_keys:
                                processed_unit_dict.update({key: value})

                        # make a new dict (row) to store both the floor plan and the unit attributes
                        apt_info_dict = {}

                        # add the floor plan info to the apt dict
                        apt_info_dict.update(floor_plan_dict)

                        # add the unit info to the apt dict
                        apt_info_dict.update(unit_dict)

                        # append the apt_info_dict to the apt_info_list
                        apt_info_list.append(apt_info_dict)

        # return the apartment info list
        return apt_info_list

    except AttributeError as e:
        print("listing floorPlans is the wrong format or is already parsed")


processed_apt_info_list = []
for bld_info_dict in bld_info_list:

    # call the clean_address function
    # bld_info_dict['address'] = clean_address(bld_info_dict['address'])

    # append the new apartment dictionaries created by the clean_floorPlans() function to the apt_info_list
    apt_info_list = clean_floorPlans(bld_info_dict)
    for apt in apt_info_list:
        processed_apt_info_list.append(apt)

print(len(processed_apt_info_list))
for apt in processed_apt_info_list:
    print(apt.keys())

print(json.dumps(processed_apt_info_list, indent=5))
'''
if key == 'buildingAttributes':
    print(value)
if key == 'amenitySummary':
    print(value)
if key == 'assignedSchools':
    print(value)
if key == 'walkScore':
    print(value)
if key == 'transitScore':
    print(value)
if key == 'bikeScore':
    print(value)
if key == 'amenityDetails':
    print(value)
if key == 'detailedPetPolicy':
    print(value)
'''


30
dict_keys(['zpid', '__typename', 'units', 'floorplanVRModel', 'minPrice', 'maxPrice', 'availableFrom', 'baths', 'beds', 'floorPlanUnitPhotos', 'name', 'photos', 'sqft', 'thirdPartyVirtualTour', 'vrModels', 'maloneId', 'description', 'unitNumber', 'housingConnector', 'housingConnectorExclusive', 'vrModel', 'hasApprovedThirdPartyVirtualTour', 'price', 'unitVRModel'])
dict_keys(['zpid', '__typename', 'units', 'floorplanVRModel', 'minPrice', 'maxPrice', 'availableFrom', 'baths', 'beds', 'floorPlanUnitPhotos', 'name', 'photos', 'sqft', 'thirdPartyVirtualTour', 'vrModels', 'maloneId', 'description', 'unitNumber', 'housingConnector', 'housingConnectorExclusive', 'vrModel', 'hasApprovedThirdPartyVirtualTour', 'price', 'unitVRModel'])
dict_keys(['zpid', '__typename', 'units', 'floorplanVRModel', 'minPrice', 'maxPrice', 'availableFrom', 'baths', 'beds', 'floorPlanUnitPhotos', 'name', 'photos', 'sqft', 'thirdPartyVirtualTour', 'vrModels', 'maloneId', 'description', 'unitNumber', 'housingConnec

"\nif key == 'buildingAttributes':\n    print(value)\nif key == 'amenitySummary':\n    print(value)\nif key == 'assignedSchools':\n    print(value)\nif key == 'walkScore':\n    print(value)\nif key == 'transitScore':\n    print(value)\nif key == 'bikeScore':\n    print(value)\nif key == 'amenityDetails':\n    print(value)\nif key == 'detailedPetPolicy':\n    print(value)\n"