In [45]:
# Conventionally people rename the pandas import to pd for brevity
import pandas as pd

In [46]:
# Load in the data and preview it
sales = pd.read_csv('home_data.csv') 
sales.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [86]:
[sales['price']]

[11553    299999
 18935    460000
 16355    295000
 6819     269500
 18933    254999
           ...  
 10834    812000
 10909    535000
 15505    700000
 10969    554729
 3112     850000
 Name: price, Length: 21613, dtype: int64]

In [47]:
# Order by zipcode
sales = sales.sort_values('zipcode')

# Create a list of the unique zipcodes (numpy.ndarray)
unique_zipcodes = sales['zipcode'].unique()

# Create list of dataframes by zipcode
list_of_df = []

for zipcode in unique_zipcodes:
   # Create df for each zipcode
   df = sales[sales['zipcode'] == zipcode]
   # Append to the list_of_df
   list_of_df.append(df)

In [48]:
from sklearn.model_selection import train_test_split

list_of_df_train = []
list_of_df_test = []

# Split each dataframe into train (80%) and test data (20%) 
for df in list_of_df:
   train_data, test_data = train_test_split(df, test_size=0.2)
   list_of_df_train.append(train_data)
   list_of_df_test.append(test_data)

print(f"# of zipcodes:", len(unique_zipcodes))
print(f"# of (train) dataframes:", len(list_of_df_train))
print(f"# of (test) dataframes:", len(list_of_df_test))


# of zipcodes: 70
# of (train) dataframes: 70
# of (test) dataframes: 70


In [49]:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error 

# List features to use for model to predict 
basic_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors']

list_of_models = []
list_of_rmse_basic_train = []
# Basic Model
for df in list_of_df_train:
   y = df.price # actual price column of training set
   X = df[basic_features] # rest of dataframe data
   # Create and train the model
   basic_model = linear_model.LinearRegression().fit(X, y)
   # Store the model into list of models
   list_of_models.append(basic_model)
   # Predict prices using the model
   y = df.price # actual price column of training set
   X = df[basic_features]
   y_pred = basic_model.predict(X)
   train_rmse_basic = mean_squared_error(y, y_pred, squared=False) #False = rmse
   list_of_rmse_basic_train.append(train_rmse_basic)

print(f"# of ML models: ", len(list_of_models))
print(f"# of Root Mean Squered Error: ", len(list_of_rmse_basic_train))


# of ML models:  70
# of Root Mean Squered Error:  70


In [50]:
# Comparing with Test Data
list_of_rmse_basic_test = []

i = 0
while i < len(list_of_models):
   df = list_of_df_test[i]
   y = df.price
   X = df[basic_features]
   y_pred = list_of_models[i].predict(X)
   test_rmse_basic = mean_squared_error(y, y_pred, squared=False) #False = rmse
   list_of_rmse_basic_test.append(test_rmse_basic)
   i += 1

df = pd.DataFrame({
   'unique_zipcodes': unique_zipcodes,
   'list_of_rmse_basic_train': list_of_rmse_basic_train,
   'list_of_rmse_basic_test': list_of_rmse_basic_test
   })


In [51]:
from joblib import dump, load
# Create and dump models into designated folder
i = 0
while (i < len(list_of_models)):
   model = list_of_models[i]
   zipcode = unique_zipcodes[i]
   dump(model, "backend_root/projectApp/ml_models/" + str(zipcode))
   i += 1

In [52]:
import numpy as np
# Example of loading linear regression model
zipcode = 98001
# Load the condensed model file
mdl = load('backend_root/projectApp/ml_models/' + str(zipcode))
# Assume user input
userInput = {
   'bedrooms': 3,
   'bathrooms': 2,
   'sqft_living': 2200,
   'sqft_lot': 4000,
   'floors': 2
}
# Convert to pandas dataframe
df = pd.DataFrame([userInput])
# Predict based on input
y_pred = mdl.predict(df)
# Display the results
print(y_pred)


[314162.14629794]


In [53]:
# Utilizing ipython widgets

import ipywidgets as widgets
from IPython.display import display

unique_zipcodes = sales['zipcode'].unique()
zipcode_list = [str(element) for element in unique_zipcodes]

widgets.Select(
    options=zipcode_list,
    # rows=10,
    description='Zipcodes:',
    disabled=False
)

Select(description='Zipcodes:', options=('98001', '98002', '98003', '98004', '98005', '98006', '98007', '98008…

In [54]:
#Use plotly to display awesome charts
import plotly.express as px

fig = px.scatter(sales, x="long",y="lat", 
                 color='zipcode', title='King County Housing Dataset 2014',
                 size='price')
fig.show()


In [91]:
import plotly.graph_objects as go
# Create a figure
fig = go.Figure()

fig.add_trace(go.Scatter(
   x=sales['sqft_living'], 
   y=sales['price'],
   mode='markers',
))

# Define the updatemenus configuration
updatemenus = [
    dict(
        buttons=[
           # Below is one button but iterated by for each loop
            dict(
                label=zipcode,
                method="update",
                args=[
                    {  'visible': [True, True, True, True],
                       'x': [sales[sales['zipcode'] == zipcode]['sqft_living']],  
                     'y': [sales[sales['zipcode'] == zipcode]['price']],
                    }
                ]
            ) for zipcode in zipcode_list
        ],
        showactive=True,
        direction="down",
        x=0.2,
        xanchor="left",
        y=1.1,
        yanchor="top"
    )
]

# Update layout properties
fig.update_layout(
    updatemenus=updatemenus,
    title_text="Zillow Housing Prices by Zipcode",
    xaxis_title="Sqft Living",
    yaxis_title="Price",
)

fig.show()

In [126]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objs as go

# Sample data (replace with your actual data)
data1 = [1, 2, 3, 4, 5]
data2 = [5, 4, 3, 2, 1]

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    dcc.Dropdown(
        id='data-dropdown',
        options=[
            {'label': zipcode, 'value': zipcode} for zipcode in zipcode_list],
        value=zipcode  # Initial dropdown value
    ),
    dcc.Graph(id='line-chart')
])

# Define a callback to update the line chart based on dropdown selection
@app.callback(
    Output('line-chart', 'figure'),
    [Input('data-dropdown', 'value')]
)
def update_line_chart(selected_data):
    df = sales[sales['zipcode']==int(selected_data)]
    print(f"Selected_data: ", selected_data)
    print(f"type", type(selected_data))
    trace = go.Scatter(
        x=df['sqft_living'],
        y=df['price'],
        mode='markers',
        name=selected_data)       

    return {
        'data': [trace],
        'layout': go.Layout(
            title=f'Chart for {selected_data}',
            xaxis={'title': 'X-axis'},
            yaxis={'title': 'Y-axis'}
        )
    }

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)

Selected_data:  98001
type <class 'int'>
Selected_data:  98001
type <class 'int'>
Selected_data:  98004
type <class 'str'>
Selected_data:  98011
type <class 'str'>
