# Setup

In [None]:
# Start 4.00
# Install module needed for the project
! pip install geoalchemy2
! pip install geopandas

In [2]:
# Import statements needed for the project
import json
import pathlib
import urllib.parse

import geoalchemy2 as gdb
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import pandas as pd
import requests
import shapely
import sqlalchemy as db

from sqlalchemy.orm import declarative_base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine, Column, String, Integer, Date, Float, MetaData, Table

# Part 1: Data Preprocessing

## 1.1) NYC Open Data

In [2]:
## Store URL and application token
url_complaint = "https://data.cityofnewyork.us/resource/erm2-nwe9.geojson"
url_tree = "https://data.cityofnewyork.us/resource/5rq2-4hqu.geojson"
app_token = "YgXUUmVq41Z9433qxe5qpLOMG"

# Define date range
start_date = "2022-10-01"
end_date = "2023-09-30"

## Define parameter
params_complaint = {"$$app_token": app_token,
                    "$select": "incident_zip,created_date,location",
                    "$where": f"created_date between '{start_date}T00:00:00' and '{end_date}T23:59:59'",
                    "$limit": 1000}

params_tree = {"$$app_token": app_token,
               "$select": "zipcode,created_at, spc_common,health,status",
               "$limit": 1000}

## Import NYC Open Data
raw_data_complaint = requests.get(url_complaint, params=params_complaint).json()
raw_data_tree = requests.get(url_tree, params=params_tree).json()

# Convert JSON to DataFrame
gdf_complaint = gpd.GeoDataFrame.from_features(raw_data_complaint['features'])
gdf_tree = gpd.GeoDataFrame.from_features(raw_data_tree['features'])

## 1.2) Geometric boundary and rent data 

In [None]:

## Import rent aata / may have to use your own directory where 'data' folder is at
raw_data_geometric = pd.read_csv(r'C:\Users\USER\Documents\GitHub\Tools-for-Analytic-Project\data\zillow_rent_data.csv'
                                 ,usecols=selected_columns)


In [4]:
### 1.2.2 Geometric data
## ??????????????????????
## ??????????????????????
## ??????????????????????

### 1.2.2 Rent data
## Define selected column (date)
selected_columns = ['2023-08-31', '2023-01-31']
# Define the path to the CSV file
csv_file_path = os.path.join(os.getcwd(),r'C:\Users\USER\Documents\GitHub\Tools-for-Analytic-Project\data\zillow_rent_data.csv')
# Load the data from the CSV file into a DataFrame
df = pd.read_csv(csv_file_path,usecols=selected_columns)

# Display the DataFrame
print(df)

       2023-01-31   2023-08-31
0     2027.438438  2053.486247
1     1738.217986  1795.384582
2     1706.900064  1757.602011
3     1458.063897  1488.180414
4     2895.699421  3064.476503
...           ...          ...
6717  3509.210744  3310.302151
6718          NaN  2639.938102
6719          NaN          NaN
6720  2169.143026  2383.185013
6721  4000.923287  4353.055657

[6722 rows x 2 columns]


## Part 2: Storing Data

In [28]:
## Create a new database in PostgreSQL
!createdb group48project

^C


In [29]:
## Turn on the PostGIS extension
!psql --dbname group48project -c 'CREATE EXTENSION postgis;'

^C


In [36]:
## Create Schema File (schema.sql):

# Define the database engine
engine = create_engine('sqlite:///mydatabase.db', echo=True)

# Define the base class for declarative class definitions
Base = declarative_base()

# Define table classes
class NYCZipCodes(Base):
    __tablename__ = 'nyc_zip_codes'
    zip_code = Column(String(10), primary_key=True)
    borough = Column(String(255))
    neighborhood = Column(String(255))
    # Add other columns as needed

class Complaints311(Base):
    __tablename__ = 'complaints_311'
    complaint_id = Column(Integer, primary_key=True)
    date_received = Column(Date)
    complaint_type = Column(String(255))
    borough = Column(String(255))
    # Add other columns as needed

class Trees(Base):
    __tablename__ = 'trees'
    tree_id = Column(Integer, primary_key=True)
    species = Column(String(255))
    diameter_inches = Column(Float)
    borough = Column(String(255))
    # Add other columns as needed

class HistoricalRents(Base):
    __tablename__ = 'historical_rents'
    id = Column(Integer, primary_key=True)
    zip_code = Column(String(10))
    date = Column(Date)
    average_rent = Column(Float)
    # Add other columns as needed

# Create an in-memory SQLite database for schema generation
Base.metadata.create_all(engine)

# Generate SQL statements for creating tables
schema_sql = str(Base.metadata)

# Save the generated SQL to a file
with open('schema.sql', 'w') as f:
    f.write(schema_sql)

2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("nyc_zip_codes")
2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("complaints_311")
2023-12-03 18:52:18,740 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,742 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("trees")
2023-12-03 18:52:18,744 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,746 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("historical_rents")
2023-12-03 18:52:18,747 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,748 INFO sqlalchemy.engine.Engine COMMIT


# Setup

In [None]:
# Start 4.00
# Install module needed for the project
! pip install geoalchemy2
! pip install geopandas

In [33]:
# Import statements needed for the project
import json
import pathlib
import urllib.parse

import geoalchemy2 as gdb
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests
import shapely
import sqlalchemy as db

from sqlalchemy.orm import declarative_base
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine, Column, String, Integer, Date, Float, MetaData, Table

# Part 1: Data Preprocessing

## 1.1) NYC Open Data

In [2]:
## Store URL and application token
url_complaint = "https://data.cityofnewyork.us/resource/erm2-nwe9.geojson"
url_tree = "https://data.cityofnewyork.us/resource/5rq2-4hqu.geojson"
app_token = "YgXUUmVq41Z9433qxe5qpLOMG"

# Define date range
start_date = "2022-10-01"
end_date = "2023-09-30"

## Define parameter
params_complaint = {"$$app_token": app_token,
                    "$select": "incident_zip,created_date,location",
                    "$where": f"created_date between '{start_date}T00:00:00' and '{end_date}T23:59:59'",
                    "$limit": 1000}

params_tree = {"$$app_token": app_token,
               "$select": "zipcode,created_at, spc_common,health,status",
               "$limit": 1000}

## Import NYC Open Data
raw_data_complaint = requests.get(url_complaint, params=params_complaint).json()
raw_data_tree = requests.get(url_tree, params=params_tree).json()

# Convert JSON to DataFrame
gdf_complaint = gpd.GeoDataFrame.from_features(raw_data_complaint['features'])
gdf_tree = gpd.GeoDataFrame.from_features(raw_data_tree['features'])

## 1.2) Geometric boundary and rent data 

In [27]:
### 1.2.2 Geometric data
## ??????????????????????
## ??????????????????????
## ??????????????????????

### 1.2.2 Rent data
## Define selected column (date)
selected_columns = ['2023-08-31', '2023-01-31']
## Import rent aata / may have to use your own directory where 'data' folder is at
raw_data_geometric = pd.read_csv(r'C:\Users\USER\Documents\GitHub\Tools-for-Analytic-Project\data\zillow_rent_data.csv'
                                 ,usecols=selected_columns)

## Part 2: Storing Data

In [28]:
## Create a new database in PostgreSQL
!createdb group48project

^C


In [29]:
## Turn on the PostGIS extension
!psql --dbname group48project -c 'CREATE EXTENSION postgis;'

^C


In [36]:
## Create Schema File (schema.sql):

# Define the database engine
engine = create_engine('sqlite:///mydatabase.db', echo=True)

# Define the base class for declarative class definitions
Base = declarative_base()

# Define table classes
class NYCZipCodes(Base):
    __tablename__ = 'nyc_zip_codes'
    zip_code = Column(String(10), primary_key=True)
    borough = Column(String(255))
    neighborhood = Column(String(255))
    # Add other columns as needed

class Complaints311(Base):
    __tablename__ = 'complaints_311'
    complaint_id = Column(Integer, primary_key=True)
    date_received = Column(Date)
    complaint_type = Column(String(255))
    borough = Column(String(255))
    # Add other columns as needed

class Trees(Base):
    __tablename__ = 'trees'
    tree_id = Column(Integer, primary_key=True)
    species = Column(String(255))
    diameter_inches = Column(Float)
    borough = Column(String(255))
    # Add other columns as needed

class HistoricalRents(Base):
    __tablename__ = 'historical_rents'
    id = Column(Integer, primary_key=True)
    zip_code = Column(String(10))
    date = Column(Date)
    average_rent = Column(Float)
    # Add other columns as needed

# Create an in-memory SQLite database for schema generation
Base.metadata.create_all(engine)

# Generate SQL statements for creating tables
schema_sql = str(Base.metadata)

# Save the generated SQL to a file
with open('schema.sql', 'w') as f:
    f.write(schema_sql)

2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("nyc_zip_codes")
2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,732 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("complaints_311")
2023-12-03 18:52:18,740 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,742 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("trees")
2023-12-03 18:52:18,744 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,746 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("historical_rents")
2023-12-03 18:52:18,747 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-12-03 18:52:18,748 INFO sqlalchemy.engine.Engine COMMIT
