In [2]:
# Loading in the basics
import psycopg2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from config import password
import seaborn as sns
pd.options.display.float_format = "{:,.2f}".format

In [3]:
census_data = pd.read_csv("resources/final_census_data.csv")
zillow_ca_city_df = pd.read_csv("resources/ca_city_annual.csv")
zillow_ca_city_df.head()

Unnamed: 0,year,region_name,list_price,sale_price,list_divided_by_sale,list_minus_sale
0,2017,"Bakersfield, CA",261989.56,230264.11,87.9,31725.44
1,2017,"Fresno, CA",340529.22,268083.33,78.73,72445.89
2,2017,"Los Angeles-Long Beach-Anaheim, CA",816180.89,616639.11,75.55,199541.78
3,2017,"Riverside, CA",389225.11,350541.67,90.06,38683.44
4,2017,"Sacramento, CA",467075.0,391028.0,83.72,76047.0


In [4]:
ca_census_df = census_data.loc[(census_data["state"] == "California") & (census_data["date"] >= "2017-01-01") & (census_data["date"] <= "2021-12-31")]
# df_2017 = zillow_final_data_df.loc[(zillow_final_data_df["date"] >= "2017-01-01") & (zillow_final_data_df["date"] <= "2017-12-31")]
ca_census_df.tail()

Unnamed: 0,city,state,sum_level,date,population
818075,Balance of Yolo County,California,County place part,2020-07-01,29337
818076,Yuba County,California,County,2020-07-01,80160
818077,Marysville city,California,County place part,2020-07-01,12594
818078,Wheatland city,California,County place part,2020-07-01,3666
818079,Balance of Yuba County,California,County place part,2020-07-01,63900


In [5]:
ca_census_df["date"] = ca_census_df['date'].astype(str).str.extract('(\d{4})').astype(int)
ca_census_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,city,state,sum_level,date,population
572758,California,California,State,2017,39337785
572759,Adelanto city,California,Incorporated place,2017,34309
572760,Agoura Hills city,California,Incorporated place,2017,20471
572761,Alameda city,California,Incorporated place,2017,80178
572762,Albany city,California,Incorporated place,2017,20168
...,...,...,...,...,...
818075,Balance of Yolo County,California,County place part,2020,29337
818076,Yuba County,California,County,2020,80160
818077,Marysville city,California,County place part,2020,12594
818078,Wheatland city,California,County place part,2020,3666


In [6]:
zillow_ca_city_df["region_name"] = zillow_ca_city_df["region_name"].str.rstrip(", CA").replace("Los Angeles-Long Beach-Anaheim", "Los Angeles")
zillow_ca_city_df

Unnamed: 0,year,region_name,list_price,sale_price,list_divided_by_sale,list_minus_sale
0,2017,Bakersfield,261989.56,230264.11,87.9,31725.44
1,2017,Fresno,340529.22,268083.33,78.73,72445.89
2,2017,Los Angeles,816180.89,616639.11,75.55,199541.78
3,2017,Riverside,389225.11,350541.67,90.06,38683.44
4,2017,Sacramento,467075.0,391028.0,83.72,76047.0
5,2017,San Diego,727886.44,553173.89,76.0,174712.56
6,2017,San Francisco,837240.67,778667.0,93.02,58573.67
7,2017,San Jose,1093434.67,992319.67,90.77,101115.0
8,2017,Stockton,407033.89,368779.22,90.6,38254.67
9,2017,Ventura,771527.44,592111.44,76.75,179416.0


In [7]:
ca_census_df["city"].value_counts()

Sonoma city                 8
Solana Beach city           8
San Clemente city           8
Rancho Mirage city          8
Aliso Viejo city            8
                           ..
Santa Cruz County           4
Balance of Lake County      4
Balance of Plumas County    4
Balance of Kings County     4
San Joaquin County          4
Name: city, Length: 595, dtype: int64

In [8]:
# us_df = us_list.merge(us_sale, on = ["region_name","date"], how = "right")
ca_census_df = ca_census_df.rename(columns = {"city":"region_name", "date":"year"})
ca_census_df["region_name"] = ca_census_df["region_name"].str.rstrip(" city")
ca_census_df = ca_census_df.loc[ca_census_df["region_name"].isin(["Bakersfield", "Fresno", "Los Angeles", "Riverside", "Sacramento", "San Diego", "San Francisco", "San Jose", "Stockton", "Ventura"])]
ca_census_df = ca_census_df.drop(columns = ["sum_level", "state"])
ca_census_df

Unnamed: 0,region_name,year,population
572785,Bakersfield,2017,377170
572910,Fresno,2017,523938
572999,Los Angeles,2017,3975067
573110,Riverside,2017,326067
573118,Sacramento,2017,498386
...,...,...,...
817858,Sacramento,2020,512838
817904,San Diego,2020,1422420
817911,San Francisco,2020,866606
817918,Stockton,2020,312716


In [9]:
df = zillow_ca_city_df.merge(ca_census_df, on = ["region_name", "year"], how = "right")
df = df.drop_duplicates()
df

Unnamed: 0,year,region_name,list_price,sale_price,list_divided_by_sale,list_minus_sale,population
0,2017,Bakersfield,261989.56,230264.11,87.9,31725.44,377170
2,2017,Fresno,340529.22,268083.33,78.73,72445.89,523938
4,2017,Los Angeles,816180.89,616639.11,75.55,199541.78,3975067
6,2017,Riverside,389225.11,350541.67,90.06,38683.44,326067
8,2017,Sacramento,467075.0,391028.0,83.72,76047.0,498386
10,2017,San Diego,727886.44,553173.89,76.0,174712.56,1409982
12,2017,San Francisco,837240.67,778667.0,93.02,58573.67,877471
14,2017,San Jose,1093434.67,992319.67,90.77,101115.0,1037082
16,2017,Stockton,407033.89,368779.22,90.6,38254.67,310459
18,2018,Bakersfield,270267.5,233165.48,86.33,37102.02,379915


In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout, Flatten,LSTM,RepeatVector,TimeDistributed,Conv1D,MaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from livelossplot.tf_keras import PlotLossesCallback
from statsmodels.tsa.seasonal import seasonal_decompose

In [11]:
X = df.drop(["sale_price","region_name"], 1).values
y = df["sale_price"]

X_train,X_test, y_train, y_test = train_test_split(X, y, random_state = 42)
scaler = StandardScaler()
X_scaler = scaler.fit(X_train, X_test, y_train, y_test)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

TypeError: fit() takes from 2 to 4 positional arguments but 5 were given

In [None]:
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 28
hidden_nodes_layer2 = 21
hidden_nodes_layer3 = 14

nn = tf.keras.models.Sequential()

# Hidden layers
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation= "swish"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="swish"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=2,
    save_weights_only=True,
    save_freq='epoch')

In [None]:
fit_model = nn.fit(X_train_scaled, y_train,batch_size = 50, epochs=10, callbacks=[cp_callback])

In [None]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")