# Preparing Listing for Analysis and Additional Machine Learning

**The data come from DynamoDB backups in S3 and are in JSON format.**

In [16]:
import boto3

import pandas as pd
import gzip
import shutil
import json

## Read Data from DynamoDB

In [17]:
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('properties_enriched')

In [38]:
def get_comprehend_data(property_id):
    """
    AWS Comprehend needs labels and text to build a custom model, this function these two fields,
    from a single record in a dynamoDB table. Consider makeing this a lambda.
    
    """

    response = table.get_item(Key={'property_id': property_id})
    property_dict = {}
    

    property_dict['description'] = response['Item']['property_info']['properties'][0]['description']
    property_dict['property_id'] = response['Item']['property_id']

    #creating rehab index/final label
    remodel_dict = {
        "old/dated":0,
        "modern/remodeled": 0,
        "destroyed/mess": 0,
        "unknown": 0,
    }

    for k,v in response['Item']['property_info']['labeled_photos'].items():
        try:
            if v[0][0]['Name'] == "old/dated":
                remodel_dict["old/dated"] = remodel_dict["old/dated"] + 1
            elif v[0][0]['Name'] == "modern/remodeled":
                remodel_dict["modern/remodeled"] = remodel_dict["modern/remodeled"] + 1
            elif v[0][0]['Name'] == "destroyed/mess":
                remodel_dict["destroyed/mess"] = remodel_dict["destroyed/mess"] + 1

        except:
            pass


    pics_with_labels = sum(remodel_dict.values()) 
    
    if pics_with_labels != 0:
        remodel_dict["old/dated"] = round(remodel_dict["old/dated"] / pics_with_labels, 2)
        remodel_dict["modern/remodeled"] = round(remodel_dict["modern/remodeled"] / pics_with_labels, 2)
        remodel_dict['destroyed/mess'] = round(remodel_dict['destroyed/mess'] / pics_with_labels, 2)
        remodel_dict['unknown'] = round(remodel_dict['unknown'] / pics_with_labels, 2)

        property_dict['remodel_status'] = max(remodel_dict, key=remodel_dict.get)
        property_dict['remodel_status_confidence'] = max(remodel_dict.values()) 
    else: 
        property_dict['remodel_status'] = None
        property_dict['remodel_status_confidence'] = None


    your_keys = ['property_id','remodel_status','description', ]
    comprehend_dict = {your_key: property_dict[your_key] for your_key in your_keys }
    return comprehend_dict


In [44]:

get_comprehend_data(property_id='M7392205127')


{'property_id': 'M7392205127',
 'remodel_status': 'modern/remodeled',
 'description': "Ideal low maintenance, zero lot home tucked in the heart of the city! Minutes from all things Memphis, this sought after gem is not to be missed! 4 Bedrooms (downstairs bedroom currently utilized as sitting room), 4 baths, 9' ceilings, hardwood floors, updated kitchen, and so much more! Abundant natural light. 2 car garage. Picture perfect patio with lush landscaping. HOA includes yard maintenance, keeping this idyllic neighborhood beautiful all the time!"}

In [45]:
get_comprehend_data(property_id='M4671480765')


{'property_id': 'M4671480765',
 'remodel_status': 'old/dated',
 'description': 'Great investment property to compliment, or start, your portfolio! Located in the Slavic Village neighborhood! Property sold as-is! Currently generating a monthly income of $1, 000.'}