In [0]:
# %pip install transformers
# %pip install torch
# %pip install TensorFlow
# %pip install flask
# %pip install plotly
%pip install PyPI

In [0]:
import pandas as pd
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("RealEstate").getOrCreate()

# Load data
real_estate_df = spark.sql(
    """
    SELECT 
      to_date(TIMESTAMP) DATE_DAY,
      CITY,
      STATE,
      CASE WHEN LEN(ZIPCODE) < 5 THEN lpad(ZIPCODE, 5, "0") ELSE ZIPCODE END AS ZIPCODE,
      HOMESTATUS, 
      AVG(TRY_CAST(BEDROOMS AS DOUBLE)) AVERAGE_BEDROOMS,
      AVG(TRY_CAST(BATHROOMS AS DOUBLE)) AVERAGE_BATHROOMS,
      AVG(TRY_CAST(LONGITUDE AS DOUBLE)) LONGITUDE,
      AVG(TRY_CAST(LATITUDE AS DOUBLE)) LATITUDE,
      AVG(coalesce(CAST(PRICE AS DOUBLE), 0)) AS AVERAGE_PRICE
    FROM `bright_data_real_estate_listings`.`datasets`.`zillow_properties`
    GROUP BY ALL
    """
)

# spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

# Data cleaning and preprocessing
real_estate_df_clean = real_estate_df.dropna()  # Example: Drop rows with missing values


In [0]:
display(real_estate_df_clean)

In [0]:
import plotly.express as px

# Example dataframe for plotting
df_plot = real_estate_df_clean.toPandas()

fig = px.scatter_mapbox(
    df_plot,
    lat="LATITUDE",
    lon="LONGITUDE",
    color="AVERAGE_PRICE",
    size="AVERAGE_PRICE",
    color_continuous_scale=px.colors.cyclical.IceFire,
    size_max=20,
    zoom=2.5,
    mapbox_style="carto-positron",
)

# Set title
fig.update_layout(title="Real Estate Prices")

# Control dimensions
fig.update_layout(
    title="Real Estate Prices",
    title_x=0.5,  # Center title
    title_font_size=24,  # Change title size
    title_font_color="white",  # Update title text color
    paper_bgcolor="black",  # Set background color
    legend_font_color="white",  # Set legend text color to white
    font_color="white",  # Set all font color to white
    mapbox=dict(
        center=dict(lat=40.7128, lon=-74.0060),  # Set center to New York City
        zoom=10,  # Set zoom level
    ),
)

# Rename the display name of a column in the figure
fig.update_traces(marker=dict(symbol="circle", opacity=0.8), name="Average Price")

# Control dimensions
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

fig.show()