In [3]:
 # Import libraries
import pandas as pd
import pickle as pk

# import libraries for api
import os
from dotenv import load_dotenv 
import requests
import json

# import libraries for vizualization
import seaborn as sns
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import panel as pn
from pprint import pprint

In [7]:
### Open data from a pickle : Zillow_data, which represent the houses currently for sale
filename = "Zillow_data"
infile = open(filename,'rb')
new_dict = pk.load(infile)
infile.close()

### transform to json
z_for_sale_resp =new_dict.json()

### transform json to datafram
current_zillow_df = pd.json_normalize(data=z_for_sale_resp["props"])
print('num of rows:', len(current_zillow_df))
print('num of columns:', len(current_zillow_df.columns))

current_zillow_df.head()

num of rows: 40
num of columns: 21


Unnamed: 0,bathrooms,propertyType,lotAreaValue,address,imgSrc,price,listingDateTime,longitude,listingStatus,zpid,...,lotAreaUnit,daysOnZillow,bedrooms,country,currency,livingArea,hasImage,listingSubType.is_FSBA,listingSubType.is_openHouse,listingSubType.is_bankOwned
0,3.0,SINGLE_FAMILY,7405.0,"6326 Hasbrook Ave, Philadelphia, PA 19111",https://photos.zillowstatic.com/fp/5cec86dede3...,305000,1636502400000,-75.100046,FOR_SALE,10360239,...,sqft,7,5,USA,USD,2100,True,True,True,
1,4.0,SINGLE_FAMILY,0.430005,"200 W Walnut Ln, Philadelphia, PA 19144",https://photos.zillowstatic.com/fp/b90b6b03745...,869000,1631577600000,-75.181814,FOR_SALE,2068504637,...,acres,64,5,USA,USD,3857,True,True,,
2,2.0,SINGLE_FAMILY,5663.0,"2843 Comly Rd, Philadelphia, PA 19154",https://photos.zillowstatic.com/fp/52664dfa32d...,279900,1636934400000,-74.997753,FOR_SALE,10584074,...,sqft,2,3,USA,USD,1584,True,True,True,
3,1.0,SINGLE_FAMILY,3049.0,"819 Levick St, Philadelphia, PA 19111",https://photos.zillowstatic.com/fp/c7277135d77...,210000,1636934400000,-75.089118,FOR_SALE,10359239,...,sqft,2,3,USA,USD,1200,True,True,,
4,2.0,SINGLE_FAMILY,2178.0,"6132 Christian St, Philadelphia, PA 19143",https://photos.zillowstatic.com/fp/46abfc40564...,289900,1636934400000,-75.246715,FOR_SALE,10186985,...,sqft,2,4,USA,USD,1840,True,True,,


In [10]:
### Data cleaning from dataset Zillow_data --------------------------------------------------------------------------------------------------------------
# Select columns that we will need for the analysis
current_price_df = current_zillow_df[[ "zpid","propertyType" , "price", "livingArea" , "bathrooms" , "bedrooms" ,"address", "listingStatus" , "latitude" , "longitude", "imgSrc"]].copy()
current_price_df.set_index("zpid", inplace=True)

#Pull only the zip code from the address column
current_price_df["address"] = current_price_df["address"].str[-5:]

# Change the column names to improve readability
current_price_df.rename(columns = {
        "livingArea" : "Area in sqft.",
        "propertyType" : "Type of Property",
        "address" : "zip code",
        "listingStatus" : "Status",
        "imgSrc" : "Image property"
        }, inplace = True)

#Add new columns in dataframes for "price per sqft"
current_price_df["price_per_sqft"] = current_price_df["price"] / current_price_df["Area in sqft."]

current_price_df.head()

Unnamed: 0_level_0,Type of Property,price,Area in sqft.,bathrooms,bedrooms,zip code,Status,latitude,longitude,Image property,price_per_sqft
zpid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10360239,SINGLE_FAMILY,305000,2100,3.0,5,19111,FOR_SALE,40.051195,-75.100046,https://photos.zillowstatic.com/fp/5cec86dede3...,145.238095
2068504637,SINGLE_FAMILY,869000,3857,4.0,5,19144,FOR_SALE,40.037048,-75.181814,https://photos.zillowstatic.com/fp/b90b6b03745...,225.304641
10584074,SINGLE_FAMILY,279900,1584,2.0,3,19154,FOR_SALE,40.101684,-74.997753,https://photos.zillowstatic.com/fp/52664dfa32d...,176.704545
10359239,SINGLE_FAMILY,210000,1200,1.0,3,19111,FOR_SALE,40.046245,-75.089118,https://photos.zillowstatic.com/fp/c7277135d77...,175.0
10186985,SINGLE_FAMILY,289900,1840,2.0,4,19143,FOR_SALE,39.949598,-75.246715,https://photos.zillowstatic.com/fp/46abfc40564...,157.554348


In [12]:
### Open data from a pickle : Zillow_old_data, which represent the houses recently sold and similar to the current one for sale
filename = "Zillow_sold_data"
infile = open(filename,'rb')
zpid_dict = pk.load(infile)
infile.close()

# Data extracted from Zillow_old_data, is in json() format, the for loop aims to transfort each key of this data in a DataFrame and add it to a general one called: sold_zillow_df .
sold_zillow_df= pd.DataFrame()

for key in zpid_dict:
    
    df_similar= pd.json_normalize(data= zpid_dict[key])
    df_similar["origine ZPID"] = key
    sold_zillow_df = pd.concat( [sold_zillow_df, df_similar], axis=0, join = "outer")
    

# Reset index from sold_zillow_df
sold_zillow_df.reset_index(inplace=True) 

# Print some information about the combined dataframe
print('num of rows:', len(sold_zillow_df))
print('num of columns:', len(sold_zillow_df.columns))
sold_zillow_df.head(10)

num of rows: 68
num of columns: 21


Unnamed: 0,index,dateSold,bedrooms,homeStatus,latitude,miniCardPhotos,zpid,longitude,bathrooms,livingArea,...,homeType,currency,lastSoldPrice,price,address.city,address.state,address.streetAddress,address.zipcode,origine ZPID,message
0,0,1626826000000.0,4.0,PENDING,40.052106,[{'url': 'https://photos.zillowstatic.com/fp/e...,10360190.0,-75.101203,4.0,2216.0,...,SINGLE_FAMILY,USD,325000.0,399999.0,Philadelphia,PA,6328 Newtown Ave,19111,10360239,
1,1,1607990000000.0,5.0,RECENTLY_SOLD,40.052383,[{'url': 'https://photos.zillowstatic.com/fp/c...,2077604000.0,-75.098753,3.0,1950.0,...,SINGLE_FAMILY,USD,310000.0,310000.0,Philadelphia,PA,6430 Hasbrook Ave #A,19111,10360239,
2,2,1631837000000.0,3.0,RECENTLY_SOLD,40.050943,[{'url': 'https://photos.zillowstatic.com/fp/f...,10358940.0,-75.101819,3.0,1600.0,...,SINGLE_FAMILY,USD,280000.0,280000.0,Philadelphia,PA,222 Robbins St,19111,10360239,
3,3,1628122000000.0,4.0,RECENTLY_SOLD,40.052527,[{'url': 'https://photos.zillowstatic.com/fp/e...,10359380.0,-75.096352,3.0,2250.0,...,SINGLE_FAMILY,USD,404000.0,404000.0,Philadelphia,PA,335 Hellerman St,19111,10360239,
4,4,1599005000000.0,5.0,SOLD,40.053611,[{'url': 'https://photos.zillowstatic.com/fp/e...,10359750.0,-75.093131,3.0,2188.0,...,SINGLE_FAMILY,USD,207000.0,207000.0,Philadelphia,PA,420 Unruh Ave,19111,10360239,
5,5,1626307000000.0,5.0,RECENTLY_SOLD,40.053664,[{'url': 'https://photos.zillowstatic.com/fp/8...,2073107000.0,-75.094718,4.0,2010.0,...,SINGLE_FAMILY,USD,360000.0,360000.0,Philadelphia,PA,342 Fanshawe St,19111,10360239,
6,6,1625011000000.0,6.0,RECENTLY_SOLD,40.051388,[{'url': 'https://photos.zillowstatic.com/fp/2...,10360390.0,-75.097422,3.0,2848.0,...,SINGLE_FAMILY,USD,480000.0,480000.0,Philadelphia,PA,6417 Shelbourne St,19111,10360239,
7,7,1580429000000.0,3.0,SOLD,40.050343,[{'url': 'https://maps.googleapis.com/maps/api...,80949590.0,-75.098404,2.0,1707.0,...,SINGLE_FAMILY,USD,125000.0,125000.0,Philadelphia,PA,6323 Shelbourne St,19111,10360239,
8,8,1604621000000.0,4.0,SOLD,40.053402,[{'url': 'https://photos.zillowstatic.com/fp/a...,10359660.0,-75.094256,3.0,2100.0,...,SINGLE_FAMILY,USD,390000.0,390000.0,Philadelphia,PA,400 Fanshawe St,19111,10360239,
9,9,1604621000000.0,4.0,SOLD,40.057228,[{'url': 'https://photos.zillowstatic.com/fp/2...,9909662.0,-75.099425,3.0,1855.0,...,SINGLE_FAMILY,USD,271000.0,271000.0,Cheltenham,PA,236 Boyer Rd,19012,10360239,


In [14]:
### Data cleaning from dataset Zillow_data -----------------------------------------------------------------------------------------------------------------------
# Select columns that we will need for the analysis
sold_price_df = sold_zillow_df[[ "zpid", "homeType", "lastSoldPrice", "livingArea" , "bathrooms" , "bedrooms" , "address.zipcode",  "homeStatus" , "latitude" , "longitude", "miniCardPhotos"]].copy()
sold_price_df.set_index("zpid", inplace=True)



# Change the column names to improve readability
sold_price_df.rename(columns = {
        "homeType" : "Type of Property",
        "lastSoldPrice" : "price",
        "livingArea" : "Area in sqft.",
        "address.zipcode" :"zip code",
        "homeStatus" : "Status",
        "miniCardPhotos" : "Image property"
        }, inplace = True)

#Add new columns in dataframes for "price per sqft"
sold_price_df["price_per_sqft"] = sold_price_df["price"] / sold_price_df["Area in sqft."]

sold_price_df.head(40)

Unnamed: 0_level_0,Type of Property,price,Area in sqft.,bathrooms,bedrooms,zip code,Status,latitude,longitude,Image property,price_per_sqft
zpid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10360190.0,SINGLE_FAMILY,325000.0,2216.0,4.0,4.0,19111.0,PENDING,40.052106,-75.101203,[{'url': 'https://photos.zillowstatic.com/fp/e...,146.66065
2077604000.0,SINGLE_FAMILY,310000.0,1950.0,3.0,5.0,19111.0,RECENTLY_SOLD,40.052383,-75.098753,[{'url': 'https://photos.zillowstatic.com/fp/c...,158.974359
10358940.0,SINGLE_FAMILY,280000.0,1600.0,3.0,3.0,19111.0,RECENTLY_SOLD,40.050943,-75.101819,[{'url': 'https://photos.zillowstatic.com/fp/f...,175.0
10359380.0,SINGLE_FAMILY,404000.0,2250.0,3.0,4.0,19111.0,RECENTLY_SOLD,40.052527,-75.096352,[{'url': 'https://photos.zillowstatic.com/fp/e...,179.555556
10359750.0,SINGLE_FAMILY,207000.0,2188.0,3.0,5.0,19111.0,SOLD,40.053611,-75.093131,[{'url': 'https://photos.zillowstatic.com/fp/e...,94.606947
2073107000.0,SINGLE_FAMILY,360000.0,2010.0,4.0,5.0,19111.0,RECENTLY_SOLD,40.053664,-75.094718,[{'url': 'https://photos.zillowstatic.com/fp/8...,179.104478
10360390.0,SINGLE_FAMILY,480000.0,2848.0,3.0,6.0,19111.0,RECENTLY_SOLD,40.051388,-75.097422,[{'url': 'https://photos.zillowstatic.com/fp/2...,168.539326
80949590.0,SINGLE_FAMILY,125000.0,1707.0,2.0,3.0,19111.0,SOLD,40.050343,-75.098404,[{'url': 'https://maps.googleapis.com/maps/api...,73.227885
10359660.0,SINGLE_FAMILY,390000.0,2100.0,3.0,4.0,19111.0,SOLD,40.053402,-75.094256,[{'url': 'https://photos.zillowstatic.com/fp/a...,185.714286
9909662.0,SINGLE_FAMILY,271000.0,1855.0,3.0,4.0,19012.0,SOLD,40.057228,-75.099425,[{'url': 'https://photos.zillowstatic.com/fp/2...,146.091644
