In [21]:
import sqlite3
import pandas as pd

# Connect to the database
conn = sqlite3.connect('test.db')

# Query all properties from the database
query = "SELECT * FROM properties"

# Load the data into a DataFrame
df = pd.read_sql_query(query, conn)

# Close the connection
conn.close()
print(f"Number of records in df: {df.shape[0]}")


Number of records in df: 7744


In [22]:
# Convert lot_area from sqft to acres where lot_area_unit is 'sqft'
df.loc[df['lot_area_unit'] == 'sqft', 'lot_area_value'] = df.loc[df['lot_area_unit'] == 'sqft', 'lot_area_value'] / 43560
df.loc[df['lot_area_unit'] == 'sqft', 'lot_area_unit'] = 'acres'


In [23]:
# Find zpid 155577068 and print all its values with the key next to it
zpid_155577068 = df[df['zpid'] == 155577068]
for index, row in zpid_155577068.iterrows():
    for key, value in row.items():
        print(f"{key}: {value}")


id: 1
zpid: 155577068
address: 2928 Meadow Ave, Fort Myers, FL 33901
unit: None
latitude: 26.624258
longitude: -81.86533
price: 48000
price_change: nan
zestimate: nan
img_src: https://photos.zillowstatic.com/fp/1a7c2fc8ec02c4c638286348faba4e2a-p_e.jpg
detail_url: https://www.zillow.com/homedetails/2928-Meadow-Ave-Fort-Myers-FL-33901/155577068_zpid/
bedrooms: nan
bathrooms: nan
living_area: nan
lot_area_value: 0.196
lot_area_unit: acres
listing_status: RECENTLY_SOLD
property_type: LOT
contingent_listing_type: None
rent_zestimate: 1149.0
days_on_zillow: 19
date_sold: 2024-08-15
country: USA
currency: USD
has_image: 1.0
county_name: None
state_id: FL
county_fips: None
zip_code: 33901


In [24]:
from datetime import datetime

# Calculate the number of days since the property was sold
df['days_since_sold'] = (datetime.now() - pd.to_datetime(df['date_sold'])).dt.days

# Group by zip code and calculate the required metrics
zip_code_stats = df.groupby('zip_code').agg(
    properties_for_sale=('listing_status', lambda x: (x == 'FOR_SALE').sum()),
    properties_sold_90_days=('listing_status', lambda x: ((x == 'RECENTLY_SOLD') & (df.loc[x.index, 'days_since_sold'] <= 90)).sum()),
    properties_sold_30_days=('listing_status', lambda x: ((x == 'RECENTLY_SOLD') & (df.loc[x.index, 'days_since_sold'] <= 30)).sum()),
    average_price=('price', 'mean'),
    median_price=('price', 'median'),
    price_std=('price', 'std'),
    average_acreage=('lot_area_value', 'mean'),
    median_acreage=('lot_area_value', 'median')
).reset_index()

# Calculate absorption rate
zip_code_stats['absorption'] = zip_code_stats['properties_sold_30_days'] / zip_code_stats['properties_for_sale']

# Output the results to a CSV file
zip_code_stats.to_csv('zip_code_stats.csv', index=False)
