In [1]:
import requests
import pandas as pd
from pprint import pprint
import time
import config

# API request setup
url = "https://realtor-data3.p.rapidapi.com/realtor/SearchSold"
headers = {
    'x-rapidapi-key': config.RAPIDAPI_KEY,
    'x-rapidapi-host': config.RAPIDAPI_HOST
}


ModuleNotFoundError: No module named 'config'

In [None]:
# List to hold Property Values
property_list = []

# Loop through multiple pages
for page in range(1, 15):
    querystring = {"location": "St. Paul, MN", "sort": "best_match", "property_type": "single_family_home", "page": page,  "page_size": 120 }

    response = requests.get(url, headers=headers, params=querystring)
        # Error handling for failed requests
    if response.status_code != 200:
        print(f"⚠️ Error fetching page {page}: {response.status_code}")
        continue
    data = response.json()
#Extract Property Values
    for item in data.get("data", []):
      # Safely access nested dictionary keys, providing default None values if a key is missing
        location_data = item.get("location", {})
        address_data = location_data.get("address", {})
        coordinate_data = address_data.get("coordinate", {}) # Ensure coordinate_data is a dict or None

        # Use .get() with a default value for 'lat' and 'lon' in case coordinate_data is None
        latitude = coordinate_data.get("lat") if coordinate_data is not None else None
        longitude = coordinate_data.get("lon") if coordinate_data is not None else None

        property_list.append({
        "list_date": item.get("list_date"),
        "list_price": item.get("list_price"),
        "sold_date": item.get("description", {}).get("sold_date"),
        "sold_price": item.get("description", {}).get("sold_price"),
        "beds": item.get("description", {}).get("beds"),
        "baths": item.get("description", {}).get("baths_consolidated"),
        "sqft": item.get("description", {}).get("sqft"),
        "lot_sqft": item.get("description", {}).get("lot_sqft"),
        "city": item.get("location", {}).get("address", {}).get("city"),
        "street": item.get("location", {}).get("address", {}).get("line"),
        "zip": item.get("location", {}).get("address", {}).get("postal_code"),
        "latitude": latitude,
        "longitude": longitude,
    })
    # Avoid hitting rate limits
    time.sleep(1)

# Create DataFrame
df = pd.DataFrame(property_list)

# Display first few rows
df

Unnamed: 0,list_date,list_price,sold_date,sold_price,beds,baths,sqft,lot_sqft,city,street,zip,latitude,longitude
0,2025-01-31T06:20:35.000000Z,209900.0,2025-03-31,209900,2.0,1.5,904.0,4704.0,Saint Paul,868 Algonquin Ave,55119,44.967562,-93.017718
1,2024-11-15T17:52:22.000000Z,374900.0,2024-12-13,391000,3.0,1.5,1534.0,4966.0,Saint Paul,1246 Bayard Ave,55116,44.922167,-93.152861
2,2024-12-20T04:16:16.000000Z,275000.0,2025-01-10,266800,3.0,1,1297.0,6534.0,Saint Paul,967 California Ave W,55117,44.991249,-93.140914
3,2025-01-23T14:51:53.000000Z,500000.0,2025-02-28,505003,3.0,2,1636.0,5009.0,Saint Paul,2098 Pinehurst Ave,55116,44.918628,-93.189055
4,2025-03-18T22:24:30.000000Z,425000.0,2025-04-08,475000,4.0,1,1750.0,7362.0,Saint Paul,2129 Sargent Ave,55105,44.935472,-93.190323
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1295,2025-01-07T17:21:21.000000Z,339900.0,2025-02-24,339900,3.0,2,1676.0,10629.0,North Saint Paul,2387 Shawnee Dr,55109,45.024836,-92.999716
1296,,,2025-04-15,225000,5.0,2.5,1854.0,11308.0,Saint Paul,2454 Ripley Ave,55109,44.995915,-92.996645
1297,,,2025-03-11,90000,2.0,1,801.0,5249.0,Saint Paul,2567 3rd Ave E,55109,45.009610,-92.992046
1298,2025-04-01T20:24:27.000000Z,315000.0,2025-05-05,360000,3.0,1,1496.0,8189.0,North Saint Paul,2805 Chisholm Ave,55109,45.025827,-93.012720


In [None]:
#Convert List Date to Datetime
df["list_date"] = pd.to_datetime(df["list_date"])
#Remove Timestamp
df["list_date"] = df["list_date"].dt.date

In [None]:
#Format list_price, beds, baths, sq_ft, lot_sqft
df["list_price"] = df["list_price"].astype('Int64')
df["beds"] = df["beds"].astype('Int64')
df["sqft"] = df["sqft"].astype('Int64')
df["lot_sqft"] = df["lot_sqft"].astype('Int64')
df["baths"] = df["baths"].astype(str).str.replace("+", "").replace("None", None).astype(float)
df


Unnamed: 0,list_date,list_price,sold_date,sold_price,beds,baths,sqft,lot_sqft,city,street,zip,latitude,longitude
0,2025-01-31,209900,2025-03-31,209900,2,1.5,904,4704,Saint Paul,868 Algonquin Ave,55119,44.967562,-93.017718
1,2024-11-15,374900,2024-12-13,391000,3,1.5,1534,4966,Saint Paul,1246 Bayard Ave,55116,44.922167,-93.152861
2,2024-12-20,275000,2025-01-10,266800,3,1.0,1297,6534,Saint Paul,967 California Ave W,55117,44.991249,-93.140914
3,2025-01-23,500000,2025-02-28,505003,3,2.0,1636,5009,Saint Paul,2098 Pinehurst Ave,55116,44.918628,-93.189055
4,2025-03-18,425000,2025-04-08,475000,4,1.0,1750,7362,Saint Paul,2129 Sargent Ave,55105,44.935472,-93.190323
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1295,2025-01-07,339900,2025-02-24,339900,3,2.0,1676,10629,North Saint Paul,2387 Shawnee Dr,55109,45.024836,-92.999716
1296,NaT,,2025-04-15,225000,5,2.5,1854,11308,Saint Paul,2454 Ripley Ave,55109,44.995915,-92.996645
1297,NaT,,2025-03-11,90000,2,1.0,801,5249,Saint Paul,2567 3rd Ave E,55109,45.009610,-92.992046
1298,2025-04-01,315000,2025-05-05,360000,3,1.0,1496,8189,North Saint Paul,2805 Chisholm Ave,55109,45.025827,-93.012720


In [None]:
duplicates = df[df.duplicated()]
print(duplicates)

In [None]:
# Save as CSV
df.to_csv("st_paul_sold_properties.csv", index=False)