In [1]:
# Load the larger data sets from AWS S3
# NYPD_Complaints.csv mirror of https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Historic/qgea-i56i/about_data
# NYC Property Valuation and Assessment original source: https://data.cityofnewyork.us/City-Government/Property-Valuation-and-Assessment-Data/yjxr-fw8i/about_data
# NYC Mental Health Data

In [2]:
import requests
from tqdm.notebook import tqdm
import os

# Define the directory constant
DATA_DIRECTORY = '../data'

def download_file(url, filename):
    """
    Download a file from a given URL into a specified directory with a specified filename.

    Args:
    url (str): URL of the file to be downloaded.
    directory (str): Directory where the file will be saved.
    filename (str): Name of the file to be saved.

    Returns:
    str: Path to the downloaded file.
    """
    # Ensure the directory exists
    if not os.path.exists(DATA_DIRECTORY):
        os.makedirs(DATA_DIRECTORY)
    
    # Define the full path for the new file
    file_path = os.path.join(DATA_DIRECTORY, filename)
    
    # Start the download
    response = requests.get(url, stream=True)
    response.raise_for_status()  # To ensure we notice bad responses
    
    # Get the total file size from header (if available)
    total_size = int(response.headers.get('content-length', 0))
    
    # Initialize the progress bar from tqdm.notebook
    with tqdm(total=total_size, unit='iB', unit_scale=True, desc=f"Downloading {filename}") as progress_bar:
        with open(file_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:  # Filter out keep-alive new chunks
                    progress_bar.update(len(chunk))
                    f.write(chunk)
    
    return file_path



In [3]:
# NYPD Complaint data
url = 'https://mf-data-analytics.s3.eu-west-1.amazonaws.com/NYPD_Complaint_Data_Historic.csv'

# Filename you want to save it as
filename = 'NYPD_Complaint_Data_Historic.csv'

# Call the function and print the path to the downloaded file
downloaded_file_path = download_file(url, filename)
print(f'File has been downloaded and saved to: {downloaded_file_path}')


In [4]:
# URL for the file you want to download
url = 'https://mf-data-analytics.s3.eu-west-1.amazonaws.com/NYC_Property_Valuation_and_Assessment.csv'


# Filename you want to save it as
filename = 'NYC_Property_Valuation_and_Assessment.csv'

# Call the function and print the path to the downloaded file
downloaded_file_path = download_file(url, filename)
print(f'File has been downloaded and saved to: {downloaded_file_path}')


In [5]:
# URL for the file you want to download
url = 'https://mf-data-analytics.s3.eu-west-1.amazonaws.com/local-mental-health-programs.csv'


# Filename you want to save it as
filename = 'NYC_Local_Mental_Health_Programs.csv'

# Call the function and print the path to the downloaded file
downloaded_file_path = download_file(url, filename)
print(f'File has been downloaded and saved to: {downloaded_file_path}')

Downloading NYC_Local_Mental_Health_Programs.csv:   0%|          | 0.00/2.31M [00:00<?, ?iB/s]

File has been downloaded and saved to: ../data/NYC_Local_Mental_Health_Programs.csv
