# Generate style labels and features for houses

In [None]:
import sys  
sys.path.insert(0, '../visual_home_finder')

import config, paths
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.applications import ResNet50

In [None]:
# Find all the csvs in the folder
#list_of_csvs = paths.list_files(config.LISTINGS_PATH, validExts='.csv')
list_of_csvs = [os.path.sep.join([config.LISTINGS_PATH,'Zip_98105.csv']), 
                os.path.sep.join([config.LISTINGS_PATH,'Zip_98115.csv']),
                os.path.sep.join([config.LISTINGS_PATH,'Zip_98117.csv'])]
# list_of_csvs = [os.path.sep.join([config.LISTINGS_PATH,'Zip_98105.csv'])]
img_mean = config.IMG_MEAN

In [None]:
# Loading our home-style feature model
home_model = load_model(os.path.sep.join([config.MODEL_PATH, config.MODEL_NAME]))

# Get home-style features from the model
home_feature_model = Model(inputs = home_model.input, 
                           outputs = home_model.get_layer('dense_4').output)

# Also get Resnet features (for comparison)
resnet_model = ResNet50()
resnet_feature_model = Model(inputs=resnet_model.input,
                            outputs = resnet_model.get_layer('avg_pool').output)

In [None]:
resnet_model.summary()

In [None]:
# Got thru each of the listings and get appropriate images
home_listings_df = pd.DataFrame()

count = 0
for listings_file in list_of_csvs:
    
    print('Processing file %s...'%(listings_file))
    
    listings_df = pd.read_csv(listings_file)
    index_column = listings_df['ZIP OR POSTAL CODE'].astype(str)+'_'+ (listings_df.index + 1).astype(str)
    listings_df = listings_df.set_index(index_column)
    
    listings_df['predicted_style'] = ""
    listings_df['home_feature'] = ""
    listings_df['resnet_feature'] = ""
    listings_df.home_feature = listings_df.home_feature.astype('object')
    listings_df.resnet_feature = listings_df.resnet_feature.astype('object')
    
    for ii in index_column:
    
        image_name = os.path.sep.join([config.LISTINGS_PATH, ii+'.jpg'])        
        try:
            img = image.load_img(image_name, target_size = (224,224))
        except:
            # Remove the particular listing for which image cannot be found
            listings_df = listings_df[listings_df.index != ii]
            print('File %s not found'%image_name)
            continue
        
        orig_img = image.img_to_array(img)
        img = np.expand_dims(orig_img - img_mean, axis = 0)
        home_style_raw = home_model.predict(img)
        
        house_style = config.CLASSES[np.argmax(np.ravel(home_style_raw))]
        listings_df.at[ii, 'predicted_style'] = house_style
        listings_df.at[ii, 'home_feature'] = \
                        np.ravel(home_feature_model.predict(img)).tolist()
        listings_df.at[ii, 'resnet_feature'] = \
                        np.ravel(resnet_feature_model.predict(img)).tolist()
        
        # Display some classifications
        count += 1
        if count<50:
            print(image_name)
            plt.imshow(orig_img/255)
            plt.title(house_style)
            plt.show()
    home_listings_df = home_listings_df.append(listings_df)
print('Processing complete!')

In [None]:
home_listings_df.to_csv(os.path.sep.join([config.FEATURE_PATH, 'home_features4.csv']))