In [80]:
# Import Drive library.
from google.colab import drive
# Mount Drive.
drive.mount('/content/drive')

# Set current location.
current_location: str = '/content/drive/MyDrive/Colab Notebooks/Data Engineering practice exercises/Exercise #2'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [81]:
import requests
import pandas as pd
import lxml
import os
import shutil

from typing import List
from requests import Response
from IPython.display import display
from google.colab import data_table as dt

In [82]:
class FunctionResponse:
  """
  A class to manage the responses from the functions.

  Properties:
  -- success: Whether the function executed successfully or not.
  -- message: A message stating the success of the operation or the cause of its failure.
  """
  def __init__(self, success: bool, message: str) -> None:
    self.success = success  # Whether the function executed successfully or not.
    self.message = message  # A message stating the success of the operation or the cause of its failure.

In [83]:
def createDirectory(name: str, delete_before_creation: bool = False) -> FunctionResponse:
  """
  Creates a new directory.

  Parameters:
  -- name: The name of the directory to create.
  -- delete_before_creation: Determines whether the directory should be deleted if it already exists or not. False by default.3

  Returns:
  -- A FunctionResponse object.
  """
  if delete_before_creation:
    # Call deleteFiles function to delete the directory.
    deleted: FunctionResponse = deleteFiles(current_location, name, True)
    # If it's not deleted then return whatever the deleteFiles function returned.
    if not deleted.success:
      return deleted
  # Try to create the directory.
  try:
    os.mkdir(f"{current_location}/{name}")
    message: str = f"Directory '{name}' created successfully."
    return FunctionResponse(True, message)
  except FileExistsError:
    message: str = f"Directory '{name}' already exists!"
    return FunctionResponse(False, message)
  except PermissionError:
    message: str = f"Permission denied to create directory '{name}'."
    return FunctionResponse(False, message)
  except Exception as e:
    message: str = f"An unhandled exception ocurred: {e}"
    return FunctionResponse(False, message)

In [84]:
def deleteFiles(path: str, file_name: str, is_directory: bool = False) -> FunctionResponse:
  """
  Deletes a directory or a file.

  Parameters:
  -- path: The folder where the file is located at.
  -- file_name: The name of the file or directory to delete.
  -- is_directory: Determines whether the file is a directory or not. False by default.

  Returns:
  -- A FunctionResponse object.
  """
  # Try to remove the file.
  try:
    if not is_directory:
      # If it's not a directory, delete the file.
      os.remove(f"{path}/{file_name}")
      message: str = f"File '{file_name}' deleted successfully."
      return FunctionResponse(True, message)
    else:
      # If it's a directory, delete it and all of the files inside it.
      shutil.rmtree(f"{path}/{file_name}")
      message: str = f"Files at folder '{file_name}' deleted successfully."
      return FunctionResponse(True, message)
  except FileNotFoundError:
    return FunctionResponse(False, f"Folder '{file_name}' not found.")
  except PermissionError:
    return FunctionResponse(False, f"Permission denied.")
  except Exception as e:
    return FunctionResponse(False, f"An unhandled exception ocurred: {e}")

In [85]:
def isValidUri(uri: str) -> FunctionResponse:
  """
  Determines if a given URI is valid or not by sending a GET request.

  Parameters:
  -- uri: The URI to look up.

  Returns:
  -- A FunctionResponse object.
  """
  # Try to get a ressponse.
  try:
    r: Response = requests.get(uri)
    message: str = "Uri exists." if r.status_code == 200 else "Uri doesn't exist."
    return FunctionResponse(r.status_code == 200, message)
  except Exception as e:
    message: str = f"An unhandled exception ocurred: {e}"
    return FunctionResponse(False, message)

In [86]:
def main():
  path = "downloads"
  url = "https://www.ncei.noaa.gov/data/local-climatological-data/access/2021/"
  date = '2024-01-19 15:27'
  if isValidUri(url).success:
    file_list = pd.read_html(url)[0]
    file_list.drop(columns = ['Size', 'Description'], inplace = True)
    file_list.dropna(inplace = True)
    file_name = file_list[file_list['Last modified'] == date].iloc[0]['Name']
    file_url = url + file_name
    if isValidUri(file_url).success:
      r = requests.get(file_url)
      if createDirectory(path, True).success:
        open(f"{current_location}/{path}/{file_name}", 'wb').write(r.content)
        df = pd.read_csv(f"{current_location}/{path}/{file_name}")
        table = dt.DataTable(df[df['HourlyDryBulbTemperature'] == df['HourlyDryBulbTemperature'].max()])
        display(table)
      else:
        print("Invalid path")
    else:
      print("Invalid URL")
  else:
    print("Invalid URL")

if __name__ == "__main__":
  main()



Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,REPORT_TYPE,SOURCE,HourlyAltimeterSetting,HourlyDewPointTemperature,...,BackupDirection,BackupDistance,BackupDistanceUnit,BackupElements,BackupElevation,BackupEquipment,BackupLatitude,BackupLongitude,BackupName,WindEquipmentChangeDate
0,1023199999,2021-09-22T12:20:00,64.35,7.8,0.0,"DRAUGEN, NO",FM-15,4,29.62,45.0,...,,,,,,,,,,
