In [14]:
 # Import libraries
import pandas as pd
import pickle as pk

# import libraries for api
import os
from dotenv import load_dotenv 
import requests
import json

# import libraries for vizualization
import plotly.express as px
import panel as pn
from pprint import pprint
import matplotlib.pyplot as plt

In [15]:
### Open data from a pickle : Zillow_data, which represent the houses currently for sale
filename = "Zillow_data"
infile = open(filename,'rb')
new_dict = pk.load(infile)
infile.close()

In [4]:
### transform to json
z_for_sale_resp =new_dict.json()

In [5]:
### transform json to datafram
current_zillow_df = pd.json_normalize(data=z_for_sale_resp["props"])
print('num of rows:', len(current_zillow_df))
print('num of columns:', len(current_zillow_df.columns))

current_zillow_df.head()

num of rows: 40
num of columns: 18


Unnamed: 0,bathrooms,propertyType,lotAreaValue,address,imgSrc,price,listingDateTime,listingStatus,zpid,longitude,latitude,lotAreaUnit,daysOnZillow,bedrooms,country,currency,livingArea,hasImage
0,2,SINGLE_FAMILY,1742.4,"318 W Ruscomb St, Philadelphia, PA 19120",https://photos.zillowstatic.com/fp/c528e4507f6...,150000,,FOR_SALE,10428068,-75.13041,40.026787,sqft,-1,4,USA,USD,1440,True
1,2,SINGLE_FAMILY,3920.4,"4913 Castor Ave, Philadelphia, PA 19124",https://photos.zillowstatic.com/fp/84229141ec1...,325000,,FOR_SALE,10291269,-75.09111,40.024803,sqft,-1,4,USA,USD,1920,True
2,4,SINGLE_FAMILY,0.43,"200 W Walnut Ln, Philadelphia, PA 19144",https://photos.zillowstatic.com/fp/b90b6b03745...,869000,,FOR_SALE,2068504637,-75.181816,40.03705,acres,-1,5,USA,USD,3857,True
3,4,SINGLE_FAMILY,9583.2,"2320 N 50th St, Philadelphia, PA 19131",https://photos.zillowstatic.com/fp/458505836a3...,267000,,FOR_SALE,10486635,-75.227104,39.99618,sqft,-1,5,USA,USD,4520,True
4,3,SINGLE_FAMILY,3049.2,"522 Gilham St, Philadelphia, PA 19111",https://photos.zillowstatic.com/fp/04cc332dd77...,144000,,FOR_SALE,2071261604,-75.092926,40.05096,sqft,-1,5,USA,USD,2007,True


In [6]:
### make a list of the zpid
zpid_list = current_zillow_df["zpid"].tolist()
current_zillow_df.columns

Index(['bathrooms', 'propertyType', 'lotAreaValue', 'address', 'imgSrc',
       'price', 'listingDateTime', 'listingStatus', 'zpid', 'longitude',
       'latitude', 'lotAreaUnit', 'daysOnZillow', 'bedrooms', 'country',
       'currency', 'livingArea', 'hasImage'],
      dtype='object')

In [7]:
### Select columns that we will need for the analysis
current_zillow_df.columns
current_price_df = current_zillow_df[[ "zpid","propertyType" , "price", "livingArea" , "bathrooms" , "bedrooms" ,"address", "listingStatus" , "latitude" , "longitude", "imgSrc"]].copy()
current_price_df.set_index("zpid", inplace=True)


#Pull only the zip code from the address column
current_price_df["address"] = current_price_df["address"].str[-5:]

# Change the column names to improve readability
current_price_df.rename(columns = {
        "livingArea" : "Area in sqft.",
        "propertyType" : "Type of Property",
        "address" : "zip code",
        "listingStatus" : "Status",
        "imgSrc" : "Image property"
        }, inplace = True)

current_price_df.head()

Unnamed: 0_level_0,Type of Property,price,Area in sqft.,bathrooms,bedrooms,zip code,Status,latitude,longitude,Image property
zpid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10428068,SINGLE_FAMILY,150000,1440,2,4,19120,FOR_SALE,40.026787,-75.13041,https://photos.zillowstatic.com/fp/c528e4507f6...
10291269,SINGLE_FAMILY,325000,1920,2,4,19124,FOR_SALE,40.024803,-75.09111,https://photos.zillowstatic.com/fp/84229141ec1...
2068504637,SINGLE_FAMILY,869000,3857,4,5,19144,FOR_SALE,40.03705,-75.181816,https://photos.zillowstatic.com/fp/b90b6b03745...
10486635,SINGLE_FAMILY,267000,4520,4,5,19131,FOR_SALE,39.99618,-75.227104,https://photos.zillowstatic.com/fp/458505836a3...
2071261604,SINGLE_FAMILY,144000,2007,3,5,19111,FOR_SALE,40.05096,-75.092926,https://photos.zillowstatic.com/fp/04cc332dd77...


In [8]:
### Open data from a pickle : Zillow_old_data, which represent the houses recently sold and similar to the current one for sale
filename = "Zillow_old_data"
infile = open(filename,'rb')
new_source = pk.load(infile)
infile.close()

In [9]:
### transform to json
z_recently_sold_resp =new_source.json()

In [10]:
### transform json to datafram
sold_zillow_df = pd.json_normalize(data=z_recently_sold_resp)
print('num of rows:', len(sold_zillow_df))
print('num of columns:', len(sold_zillow_df.columns))

sold_zillow_df.head(20)

num of rows: 5
num of columns: 18


Unnamed: 0,dateSold,bedrooms,homeStatus,latitude,miniCardPhotos,zpid,longitude,bathrooms,livingArea,livingAreaUnits,homeType,currency,lastSoldPrice,price,address.city,address.state,address.streetAddress,address.zipcode
0,1610582400000,5,RECENTLY_SOLD,40.05546,[{'url': 'https://photos.zillowstatic.com/fp/1...,10214276,-75.19509,3,2138,Square Feet,SINGLE_FAMILY,USD,470000,470000,Philadelphia,PA,7139 Lincoln Dr,19119
1,1619740800000,3,RECENTLY_SOLD,40.051693,[{'url': 'https://photos.zillowstatic.com/fp/f...,10284714,-75.19462,4,2200,Square Feet,SINGLE_FAMILY,USD,477500,477500,Philadelphia,PA,403 W Ellet St,19119
2,1614556800000,4,RECENTLY_SOLD,40.05139,[{'url': 'https://photos.zillowstatic.com/fp/6...,10284790,-75.19548,3,2400,Square Feet,SINGLE_FAMILY,USD,443000,443000,Philadelphia,PA,426 Glen Echo Rd,19119
3,1629158400000,5,RECENTLY_SOLD,40.057297,[{'url': 'https://photos.zillowstatic.com/fp/c...,103779525,-75.193504,3,2665,Square Feet,SINGLE_FAMILY,USD,480000,480000,Philadelphia,PA,7141 Cresheim Rd,19119
4,1634256000000,3,RECENTLY_SOLD,40.051323,[{'url': 'https://photos.zillowstatic.com/fp/4...,333709815,-75.19686,3,1760,Square Feet,SINGLE_FAMILY,USD,460000,460000,Philadelphia,PA,504 W Mount Pleasant Ave,19119


In [11]:
### Select columns that we will need for the analysis
sold_zillow_df.columns
sold_price_df = sold_zillow_df[[ "zpid", "homeType", "lastSoldPrice", "livingArea" , "bathrooms" , "bedrooms" , "address.zipcode",  "homeStatus" , "latitude" , "longitude", "miniCardPhotos"]].copy()
sold_price_df.set_index("zpid", inplace=True)

# Change the column names to improve readability
sold_price_df.rename(columns = {
        "homeType" : "Type of Property",
        "lastSoldPrice" : "Price",
        "livingArea" : "Area in sqft.",
        "address.zipcode" :"zip code",
        "homeStatus" : "Status",
        "miniCardPhotos" : "Image property"
        }, inplace = True)

sold_price_df.head(40)

Unnamed: 0_level_0,Type of Property,Price,Area in sqft.,bathrooms,bedrooms,zip code,Status,latitude,longitude,Image property
zpid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10214276,SINGLE_FAMILY,470000,2138,3,5,19119,RECENTLY_SOLD,40.05546,-75.19509,[{'url': 'https://photos.zillowstatic.com/fp/1...
10284714,SINGLE_FAMILY,477500,2200,4,3,19119,RECENTLY_SOLD,40.051693,-75.19462,[{'url': 'https://photos.zillowstatic.com/fp/f...
10284790,SINGLE_FAMILY,443000,2400,3,4,19119,RECENTLY_SOLD,40.05139,-75.19548,[{'url': 'https://photos.zillowstatic.com/fp/6...
103779525,SINGLE_FAMILY,480000,2665,3,5,19119,RECENTLY_SOLD,40.057297,-75.193504,[{'url': 'https://photos.zillowstatic.com/fp/c...
333709815,SINGLE_FAMILY,460000,1760,3,3,19119,RECENTLY_SOLD,40.051323,-75.19686,[{'url': 'https://photos.zillowstatic.com/fp/4...


In [18]:
total_price_df = pd.concat([current_price_df,sold_price_df ], axis=0, join="inner")

total_price_df.head()

Unnamed: 0_level_0,Type of Property,Area in sqft.,bathrooms,bedrooms,zip code,Status,latitude,longitude,Image property
zpid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10428068,SINGLE_FAMILY,1440,2,4,19120,FOR_SALE,40.026787,-75.13041,https://photos.zillowstatic.com/fp/c528e4507f6...
10291269,SINGLE_FAMILY,1920,2,4,19124,FOR_SALE,40.024803,-75.09111,https://photos.zillowstatic.com/fp/84229141ec1...
2068504637,SINGLE_FAMILY,3857,4,5,19144,FOR_SALE,40.03705,-75.181816,https://photos.zillowstatic.com/fp/b90b6b03745...
10486635,SINGLE_FAMILY,4520,4,5,19131,FOR_SALE,39.99618,-75.227104,https://photos.zillowstatic.com/fp/458505836a3...
2071261604,SINGLE_FAMILY,2007,3,5,19111,FOR_SALE,40.05096,-75.092926,https://photos.zillowstatic.com/fp/04cc332dd77...


In [20]:
total_price_df.plot.bar(x='zip code', y= 'price', title=' different house by neighborhood', figsize =(20, 30))

KeyError: 'price'