In [1]:
# Load the larger data sets from AWS S3
# NYPD_Complaints.csv 
# - original source https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Historic/qgea-i56i/about_data
# NYC Mental Health Data 
# - original source https://data.cityofnewyork.us/Health/Mental-Health-Service-Finder-Data/8nqg-ia7v/about_data
# NYC Census Block and Census Tracts Data original sources:
# - original source https://www.kaggle.com/datasets/muonneutrino/new-york-city-census-data?select=census_block_loc.csv
# - original source https://www.kaggle.com/datasets/muonneutrino/new-york-city-census-data?select=nyc_census_tracts.csv

In [2]:
import requests
from tqdm.notebook import tqdm
import os

# Define the directory constant
DATA_DIRECTORY = '../data'

def download_file(url, filename):
    """
    Download a file from a given URL into a specified directory with a specified filename.

    Args:
    url (str): URL of the file to be downloaded.
    directory (str): Directory where the file will be saved.
    filename (str): Name of the file to be saved.

    Returns:
    str: Path to the downloaded file.
    """
    # Ensure the directory exists
    if not os.path.exists(DATA_DIRECTORY):
        os.makedirs(DATA_DIRECTORY)
    
    # Define the full path for the new file
    file_path = os.path.join(DATA_DIRECTORY, filename)
    
    # Start the download
    response = requests.get(url, stream=True)
    response.raise_for_status()  # To ensure we notice bad responses
    
    # Get the total file size from header (if available)
    total_size = int(response.headers.get('content-length', 0))
    
    # Initialize the progress bar from tqdm.notebook
    with tqdm(total=total_size, unit='iB', unit_scale=True, desc=f"Downloading {filename}") as progress_bar:
        with open(file_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:  # Filter out keep-alive new chunks
                    progress_bar.update(len(chunk))
                    f.write(chunk)
    
    return file_path



In [3]:
# NYPD Complaint data
url = 'https://mf-data-analytics.s3.eu-west-1.amazonaws.com/NYPD_Complaint_Data_Historic.csv'

# Filename you want to save it as
filename = 'NYPD_Complaint_Data_Historic.csv'

# Call the function and print the path to the downloaded file
downloaded_file_path = download_file(url, filename)
print(f'File has been downloaded and saved to: {downloaded_file_path}')


Downloading NYPD_Complaint_Data_Historic.csv:   0%|          | 0.00/3.03G [00:00<?, ?iB/s]

File has been downloaded and saved to: ../data/NYPD_Complaint_Data_Historic.csv


In [8]:
# URL for the file you want to download
url = 'https://mf-data-analytics.s3.eu-west-1.amazonaws.com/Mental_Health_Service_Finder_Data_20240816.csv'


# Filename you want to save it as
filename = 'NYC_Mental_Health_Service_Finder_Data.csv'

# Call the function and print the path to the downloaded file
downloaded_file_path = download_file(url, filename)
print(f'File has been downloaded and saved to: {downloaded_file_path}')

Downloading NYC_Mental_Health_Service_Finder_Data.csv:   0%|          | 0.00/89.1k [00:00<?, ?iB/s]

File has been downloaded and saved to: ../data/NYC_Mental_Health_Service_Finder_Data.csv


In [6]:
url = 'https://mf-data-analytics.s3.eu-west-1.amazonaws.com/census_block_loc.csv'

filename = 'NYC_census_block_loc.csv'

# Call the function and print the path to the downloaded file
downloaded_file_path = download_file(url, filename)
print(f'File has been downloaded and saved to: {downloaded_file_path}')

Downloading NYC_census_block_loc.csv:   0%|          | 0.00/2.17M [00:00<?, ?iB/s]

File has been downloaded and saved to: ../data/NYC_census_block_loc.csv


In [7]:
url = 'https://mf-data-analytics.s3.eu-west-1.amazonaws.com/nyc_census_tracts.csv'

filename = 'NYC_census_tracts.csv'

# Call the function and print the path to the downloaded file
downloaded_file_path = download_file(url, filename)
print(f'File has been downloaded and saved to: {downloaded_file_path}')

Downloading NYC_census_tracts.csv:   0%|          | 0.00/320k [00:00<?, ?iB/s]

File has been downloaded and saved to: ../data/NYC_census_tracts.csv
