In [1]:
from sqlalchemy import create_engine, Column, String
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
import quandl
import psycopg2
import pandas as pd

In [2]:
POSTGRES_URL = 'postgresql://postgres:postgres@localhost:5432/zillow_quandl'

# Create engine and session
engine = create_engine(POSTGRES_URL)
Session = sessionmaker(bind=engine)
session = Session()

# Define table and columns
Base = declarative_base()

class ZillowData(Base):
    __tablename__ = 'zillow_data'

    indicator_id = Column(String)
    region_id = Column(String)
    date = Column(String)
    value = Column(String, primary_key=True)
    # Add more columns as per the actual table structure

# Create table
Base.metadata.create_all(engine)

# Retrieve data from Quandl API
quandl.ApiConfig.api_key = 'Hxc-vj2V4iBzuusYo__Q'


In [5]:

#Export entire tables to zip
#export target location is in parent directory, so data is not added to repo
data = quandl.export_table('ZILLOW/DATA', filename='../Data/zillow_data.zip')
reg = quandl.export_table('ZILLOW/REGIONS', filename='../Data/zillow_reg.zip')
ing = quandl.export_table('ZILLOW/INDICATORS', filename='../Data/zillow_ind.zip')



In [6]:
import zipfile

#Unzip all to Data folder (not committed to repo)
with zipfile.ZipFile("../Data/zillow_data.zip","r") as zip_ref:
    zip_ref.extractall('../Data/')
with zipfile.ZipFile("../Data/zillow_reg.zip","r") as zip_ref:
    zip_ref.extractall('../Data/')
with zipfile.ZipFile("../Data/zillow_ind.zip","r") as zip_ref:
    zip_ref.extractall('../Data/')

In [7]:
# Establish a connection to your PostgreSQL database
conn = psycopg2.connect(
    host='localhost',
    port=5432,
    user='postgres',
    password='postgres',
    database='zillow_quandl'
)

# Create a cursor object to interact with the database
cursor = conn.cursor()

# Define the table schema
table_name = 'zillow_indicators'
schema = '''
DROP TABLE if exists zillow_indicators;

CREATE TABLE zillow_indicators(
	indicator_id VARCHAR,
	indicator VARCHAR,
	category VARCHAR,
    PRIMARY KEY (indicator_id)
)
'''.format(table_name)

# Execute the table creation query
cursor.execute(schema)

# Read the CSV file into a pandas DataFrame
csv_file = '../Data/ZILLOW_INDICATORS_e93833a53d6c88463446a364cda611cc.csv'
df = pd.read_csv(csv_file)

# Insert the DataFrame's data into the table
for index, row in df.iterrows():
    query = '''
    INSERT INTO {} (indicator_id, indicator, category)
    VALUES (%s, %s, %s)
    '''.format(table_name)
    values = tuple(row)
    cursor.execute(query, values)
    
# Commit the changes and close the cursor and connection
conn.commit()
cursor.close()
conn.close()


In [8]:
# Establish a connection to your PostgreSQL database
conn = psycopg2.connect(
    host='localhost',
    port=5432,
    user='postgres',
    password='postgres',
    database='zillow_quandl'
)

# Create a cursor object to interact with the database
cursor = conn.cursor()

# Define the table schema
table_name = 'zillow_regions'
schema = '''
DROP TABLE if exists zillow_regions;

CREATE TABLE zillow_regions(
	region_id INT,
	region_type TEXT,
	region TEXT,
    PRIMARY KEY (region_id)
)
'''.format(table_name)

# Execute the table creation query
cursor.execute(schema)

# Read the CSV file into a pandas DataFrame
csv_file = '../Data/ZILLOW_REGIONS_1a51d107db038a83ac171d604cb48d5b.csv'
df = pd.read_csv(csv_file)

# Insert the DataFrame's data into the table
for index, row in df.iterrows():
    query = '''
    INSERT INTO {} (region_id, region_type, region)
    VALUES (%s, %s, %s)
    '''.format(table_name)
    values = tuple(row)
    cursor.execute(query, values)
    
# Commit the changes and close the cursor and connection
conn.commit()
cursor.close()
conn.close()

The above was accomplished only after DOWNLOADING the tables to zip, and then to csv.
Those were then inserted. (DATA was manually imported into postgres)

This method appears to be taxing on memory. It would be a lighter lift and also more 
sustainable to call and retrieve these data tables in pieces, so we wouldnt have to 
save csvs (current method), nor try to hold all the data in memory at once. 

