In [1]:
# Database dependencies
from config import username, db_password
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.sql import text

# Machine Learning dependencies
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

In [2]:
#  create the database engine
db_string = f"postgresql://{username}:{db_password}@127.0.0.1:5432/FinalProject"
engine = sqlalchemy.create_engine(db_string)

In [3]:
# bring in SQL tables

# housing without populations
with engine.connect().execution_options(autocommit=True) as conn:
    housing_all_ny = pd.read_sql(f"""SELECT * FROM housing_without_join""", con = conn)
    
# housing with populations
with engine.connect().execution_options(autocommit=True) as conn:
    housing = pd.read_sql(f"""SELECT * FROM housing""", con = conn)

# urban populations
with engine.connect().execution_options(autocommit=True) as conn:
        urban_populations = pd.read_sql(f"""SELECT * FROM housing_urban_pops""", con = conn)
    
# city populations
with engine.connect().execution_options(autocommit=True) as conn:
        city_populations = pd.read_sql(f"""SELECT * FROM housing_city_pops""", con = conn)

In [4]:
# preview dataframes
housing_all_ny.head()

Unnamed: 0,index,status,price,bed,bath,acre_lot,city,state,zip_code,house_size,sold_date
0,54248,for_sale,425000.0,3.0,2.0,2.02,Claverack,New York,12521.0,1600.0,2021-11-24
1,54533,for_sale,435000.0,3.0,2.0,5.01,East Chatham,New York,12060.0,2504.0,2020-11-09
2,54551,for_sale,425000.0,3.0,2.0,2.02,Claverack,New York,12521.0,1600.0,2021-11-24
3,56030,for_sale,339000.0,4.0,5.0,0.51,New Lebanon,New York,12125.0,3133.0,2019-09-11
4,56088,for_sale,625000.0,4.0,2.0,5.2,Copake Falls,New York,12517.0,2029.0,2019-10-23


In [5]:
housing.head()

Unnamed: 0,index,status,price,bed,bath,acre_lot,city,state,zip_code,house_size,sold_date
0,466030,for_sale,979000.0,8.0,5.0,0.04,Bronx,New York,10467.0,3297.0,2019-01-24
1,466364,for_sale,937500.0,4.0,3.0,0.06,Bronx,New York,10472.0,2585.0,2019-08-26
2,466770,for_sale,879000.0,7.0,4.0,0.05,Bronx,New York,10458.0,3374.0,2020-10-23
3,467315,for_sale,589000.0,3.0,2.0,0.05,Bronx,New York,10469.0,1664.0,2021-02-01
4,467471,for_sale,800000.0,4.0,2.0,0.05,Brooklyn,New York,11207.0,2880.0,2019-12-23


In [6]:
# housing_urban_pops
urban_populations.head()

Unnamed: 0,status,price,bed,bath,acre_lot,city,zip_code,house_size,sold_date,population_2019,population_2020,population_2021
0,for_sale,539000.0,3.0,3.0,0.14,Suffern,10901.0,1600.0,2020-09-11,11007,11441,11402
1,for_sale,799000.0,5.0,4.0,0.63,Suffern,10901.0,3750.0,2021-05-26,11007,11441,11402
2,for_sale,539000.0,3.0,3.0,0.14,Suffern,10901.0,1600.0,2020-09-11,11007,11441,11402
3,for_sale,424900.0,3.0,3.0,0.14,Nyack,10960.0,1326.0,2021-02-10,7156,7247,7236
4,for_sale,1099999.0,5.0,5.0,1.94,Stony Point,10980.0,3800.0,2021-11-05,12586,14813,14768


In [7]:
# housing_city_pops
city_populations.head()

Unnamed: 0,status,price,bed,bath,acre_lot,city,zip_code,house_size,sold_date,population_2019,population_2020,population_2021
0,for_sale,979000.0,8.0,5.0,0.04,Bronx,10467.0,3297.0,2019-01-24,1418207,1472654,1424948
1,for_sale,937500.0,4.0,3.0,0.06,Bronx,10472.0,2585.0,2019-08-26,1418207,1472654,1424948
2,for_sale,879000.0,7.0,4.0,0.05,Bronx,10458.0,3374.0,2020-10-23,1418207,1472654,1424948
3,for_sale,589000.0,3.0,2.0,0.05,Bronx,10469.0,1664.0,2021-02-01,1418207,1472654,1424948
4,for_sale,800000.0,4.0,2.0,0.05,Brooklyn,11207.0,2880.0,2019-12-23,2559903,2736074,2641052
