# Exercise #1.

## Mount Google Drive

In [None]:
# Import Drive library.
from google.colab import drive
# Mount Drive
drive.mount('/content/drive/')

# Prepare current location variable.
current_location: str = '/content/drive/MyDrive/Colab Notebooks/Data Engineering practice exercises/Exercise #1'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


## Download requirements file from GitHub

In [None]:
# Prepare variables for file url and file destination.
file_url: str = 'https://raw.githubusercontent.com/danielbeach/data-engineering-practice/refs/heads/main/Exercises/Exercise-1/requirements.txt'
file_dest: str = f'{current_location}/requirements.txt'

# Get the file from the url to the destination.
!wget -O "$file_dest" "$file_url"

--2025-06-04 22:53:31--  https://raw.githubusercontent.com/danielbeach/data-engineering-practice/refs/heads/main/Exercises/Exercise-1/requirements.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16 [text/plain]
Saving to: ‘/content/drive/MyDrive/Colab Notebooks/Data Engineering practice exercises/Exercise #1/requirements.txt’


2025-06-04 22:53:31 (229 KB/s) - ‘/content/drive/MyDrive/Colab Notebooks/Data Engineering practice exercises/Exercise #1/requirements.txt’ saved [16/16]



## Import libraries.

In [32]:
import os
import time
import shutil
import zipfile
import requests

# Import types.
from typing import List
from requests import Response

## Get files from the internet.

### Create an array with all uris.

In [None]:
download_uris: List[str] = [
    "https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2018_Q4.zip",
    "https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q1.zip",
    "https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q2.zip",
    "https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q3.zip",
    "https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q4.zip",
    "https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2020_Q1.zip",
    "https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2220_Q1.zip",
]

### Define a class to manage the responses from the functions.

In [None]:
class FunctionResponse:
  """
  A class to manage the responses from the functions.

  Properties:
  -- success: Whether the function executed successfully or not.
  -- message: A message stating the success of the operation or the cause of its failure.
  """
  def __init__(self, success: bool, message: str) -> None:
    self.success = success  # Whether the function executed successfully or not.
    self.message = message  # A message stating the success of the operation or the cause of its failure.

### Create a function that creates a directory (and deletes it before creation if it exists).

In [None]:
def createDirectory(name: str, delete_before_creation: bool = False) -> FunctionResponse:
  """
  Creates a new directory.

  Parameters:
  -- name: The name of the directory to create.
  -- delete_before_creation: Determines whether the directory should be deleted if it already exists or not. False by default.3

  Returns:
  -- A FunctionResponse object.
  """
  if delete_before_creation:
    # Call deleteFiles function to delete the directory.
    deleted: FunctionResponse = deleteFiles(current_location, name, True)
    # If it's not deleted then return whatever the deleteFiles function returned.
    if not deleted.success:
      return deleted
  # Try to create the directory.
  try:
    os.mkdir(f"{current_location}/{name}")
    return FunctionResponse(True, f"Directory '{name}' created successfully.")
  except FileExistsError:
    return FunctionResponse(False, str = f"Directory '{name}' already exists!")
  except PermissionError:
    return FunctionResponse(False, f"Permission denied to create directory '{name}'.")
  except Exception as e:
    return FunctionResponse(False, f"An unhandled exception ocurred: {e}")

### Create a function that validates a given URI.

In [None]:
def isValidUri(uri: str) -> FunctionResponse:
  """
  Determines if a given URI is valid or not by sending a GET request.

  Parameters:
  -- uri: The URI to look up.

  Returns:
  -- A FunctionResponse object.
  """
  # Try to get a ressponse.
  try:
    r: Response = requests.get(uri)
    return FunctionResponse(r.status_code == 200, "Uri exists." if r.status_code == 200 else "Uri doesn't exist.")
  except Exception as e:
    return FunctionResponse(False, f"An unhandled exception ocurred: {e}")

### Create a function that unzips a zip folder.

In [None]:
def unzip(path: str, file_name: str) -> FunctionResponse:
  """
  Unzips a zip file.

  Parameters:
  -- path: The folder where the zip file is located.
  -- file_name: The name of the zip file.

  Returns:
  -- A FunctionResponse object.
  """
  # Try to extract the file.
  try:
    with zipfile.ZipFile(f"{path}/{file_name}", 'r') as zip_ref:
      # Extract all of the files.
      zip_ref.extractall(f"{path}/{file_name.split('.')[0]}")
    return FunctionResponse(True, f"File '{file_name}' unzipped successfully.")
  except Exception as e:
    return FunctionResponse(False, f"An unhandled exception ocurred: {e}")

### Create a function that deletes all files in a folder or that deletes a directory.

In [None]:
def deleteFiles(path: str, file_name: str, is_directory: bool = False) -> FunctionResponse:
  """
  Deletes a directory or a file.

  Parameters:
  -- path: The folder where the file is located at.
  -- file_name: The name of the file or directory to delete.
  -- is_directory: Determines whether the file is a directory or not. False by default.

  Returns:
  -- A FunctionResponse object.
  """
  # Try to remove the file.
  try:
    if not is_directory:
      # If it's not a directory, delete the file.
      os.remove(f"{path}/{file_name}")
      return FunctionResponse(True, f"File '{file_name}' deleted successfully.")
    else:
      # If it's a directory, delete it and all of the files inside it.
      shutil.rmtree(f"{path}/{file_name}")
      return FunctionResponse(True, f"Files at folder '{file_name}' deleted successfully.")
  except FileNotFoundError:
    return FunctionResponse(False, f"Folder '{file_name}' not found.")
  except PermissionError:
    return FunctionResponse(False, f"Permission denied.")
  except Exception as e:
    return FunctionResponse(False, f"An unhandled exception ocurred: {e}")

### Main code.

In [None]:
def main() -> None:
  dir_name: str = "downloads"
  dir_created: FunctionResponse = createDirectory(dir_name, True)
  if dir_created.success:
    for uri in download_uris:
      is_valid_uri: FunctionResponse = isValidUri(uri)
      if is_valid_uri.success:
        # Download files.
        print(f"Downloading file from '{uri}'...")
        file_name: str = uri.split("/")[-1]
        initial_time: float = time.time()
        r: Response = requests.get(uri, allow_redirects = True)
        open(f"{current_location}/{dir_name}/{file_name}", 'wb').write(r.content)
        final_time: float = time.time()
        print(f"File '{file_name}' downloaded successfully in {round(final_time - initial_time, 2)} seconds.")
        # Unzip files.
        unzipped: FunctionResponse = unzip(f"{current_location}/{dir_name}", file_name)
        if unzipped.success:
          # Delete files.
          deleted: FunctionResponse = deleteFiles(f"{current_location}/{dir_name}", file_name)
          if deleted.success:
            final_time: bool = time.time()
            print(f"Process finished in {round(final_time - initial_time, 2)} seconds.\n")
          else:
            print(deleted.message)
        else:
          print(unzipped.message)
      else:
        print(is_valid_uri.message)
  else:
    print(dir_created.message)


if __name__ == "__main__":
    main()

Downloading file from 'https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2018_Q4.zip'...
File 'Divvy_Trips_2018_Q4.zip' downloaded successfully in 0.59 seconds.
Process finished in 1.87 seconds.

Downloading file from 'https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q1.zip'...
File 'Divvy_Trips_2019_Q1.zip' downloaded successfully in 0.4 seconds.
Process finished in 1.41 seconds.

Downloading file from 'https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q2.zip'...
File 'Divvy_Trips_2019_Q2.zip' downloaded successfully in 0.7 seconds.
Process finished in 2.97 seconds.

Downloading file from 'https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q3.zip'...
File 'Divvy_Trips_2019_Q3.zip' downloaded successfully in 0.81 seconds.
Process finished in 5.96 seconds.

Downloading file from 'https://divvy-tripdata.s3.amazonaws.com/Divvy_Trips_2019_Q4.zip'...
File 'Divvy_Trips_2019_Q4.zip' downloaded successfully in 0.69 seconds.
Process finished in 3.19 seconds.

Downloading 