## Google Drive API: Pip install assistance

In [1]:
pip install gdown google-api-python-client google-auth google-auth-oauthlib google-auth-httplib2

StatementMeta(, fb6a937c-5bd1-4099-be4e-673dce285914, 3, Finished, Available)

Collecting gdown
  Downloading gdown-5.1.0-py3-none-any.whl (17 kB)
Collecting google-api-python-client
  Downloading google_api_python_client-2.118.0-py2.py3-none-any.whl (12.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m127.1 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Collecting google-auth-httplib2
  Downloading google_auth_httplib2-0.2.0-py2.py3-none-any.whl (9.3 kB)
Collecting httplib2<1.dev0,>=0.15.0 (from google-api-python-client)
  Downloading httplib2-0.22.0-py3-none-any.whl (96 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.9/96.9 kB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0.dev0,>=1.31.5 (from google-api-python-client)
  Downloading google_api_core-2.17.1-py3-none-any.whl (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.0/137.0 kB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uritemplate<5,>

## Libraries Import

In [2]:
import os
import io
import pandas as pd
import builtin.utils as ut

StatementMeta(, fb6a937c-5bd1-4099-be4e-673dce285914, 4, Finished, Available)

## Update List Function


 `update_list(check_path, folder_path)`

Updates a list DataFrame with the current files in a local folder and saves the changes.

#### Parameters:
- `check_path` (str): Path to the DataFrame file to be updated.
- `folder_path` (str): Local path of the folder containing the files to update.

#### Notes:
- Assumes the DataFrame contains 'name' and 'files' columns.
- The DataFrame is updated with the current files in the local folder.
- Saves the updated DataFrame back to the specified path.

#### Example:
```python
update_list('/path/to/check_dataframe.parquet', '/local/path/to/folder')


In [3]:
# Update a list DataFrame with current files in a local folder
def update_list(check_path,folder_path):
    """
    Updates a list DataFrame with the current files in a local folder and saves the changes.

    Parameters:
    - check_path (str): Path to the DataFrame file to be updated.

    Notes:
    - Assumes the DataFrame contains 'name' and 'files' columns.
    - The DataFrame is updated with the current files in the local folder.
    - Saves the updated DataFrame back to the specified path.

    Example:
    - update_list('/path/to/check_dataframe.parquet')
    """

    original_path = folder_path
    list_name = check_path.split('/')[-1].split('.')[0]

    local_folder_path = os.path.join(original_path, list_name)

    try:
        # Read the existing check DataFrame from the specified path
        check_df = pd.read_parquet(check_path)
    except Exception as e:
        print(f"Error reading the Parquet file: {e}")
        return

    # Iterate through each row in the check DataFrame
    for row in check_df.itertuples():
        check_name = row.name
        check_list = row.files
        aux_folder_path = os.path.join(local_folder_path, check_name)

        # Check if the row corresponds to the main folder
        if check_name == list_name:
            matching_rows = check_df[check_df['name'] == check_name]

            # Check if a matching row is found in the DataFrame
            if not matching_rows.empty:
                aux_index = matching_rows.index[0]

                # Get the current list of files in the main folder
                new_list = os.listdir(local_folder_path)

                # Update the 'files' column in the DataFrame with the current list
                check_df.at[aux_index, 'files'] = new_list
                break
            else:
                print(f"No matching row found with the name {check_name}.")
                continue

        # Check if the folder corresponding to the current row exists locally
        if os.path.isdir(aux_folder_path):
            matching_rows = check_df[check_df['name'] == check_name]

            # Check if a matching row is found in the DataFrame
            if not matching_rows.empty:
                aux_index = matching_rows.index[0]

                # Get the current list of files in the sub-folder
                new_list = os.listdir(aux_folder_path)

                # Update the 'files' column in the DataFrame with the current list
                check_df.at[aux_index, 'files'] = new_list
            else:
                print(f"No matching row found with the name {check_name}.")

    try:
        # Save the updated DataFrame back to the specified path
        check_df.to_parquet(path=check_path)
    except Exception as e:
        print(f"Error writing the Parquet file: {e}")


StatementMeta(, fb6a937c-5bd1-4099-be4e-673dce285914, 5, Finished, Available)

## Exercise Test

In [4]:
estados_path = '/lakehouse/default/Files/otra_prueba/pruebaList/reviewEstadosPrueba.parquet'
sitios_path = '/lakehouse/default/Files/otra_prueba/pruebaList/metadataSitiosPrueba.parquet'
folder_path = '/lakehouse/default/Files/otra_prueba'

update_list(estados_path,folder_path)
update_list(sitios_path,folder_path)

StatementMeta(, fb6a937c-5bd1-4099-be4e-673dce285914, 6, Finished, Available)

## Original Routing:

Since this point. All routes used are related with the original database. Here we can find all path needed in order to link information extracted from the requested drive folder and database created

## Metadata - Sitios List Update

In [5]:
check_metadata_path = '/lakehouse/default/Files/notes_and_list/metadata-sitios.parquet'
folder_path = '/lakehouse/default/Files/original'

update_list(check_metadata_path,folder_path)

StatementMeta(, fb6a937c-5bd1-4099-be4e-673dce285914, 7, Finished, Available)

## Review - Estados List Update

In [6]:
check_metadata_path = '/lakehouse/default/Files/notes_and_list/reviews-estados.parquet'
folder_path = '/lakehouse/default/Files/original'

update_list(check_metadata_path,folder_path)

StatementMeta(, fb6a937c-5bd1-4099-be4e-673dce285914, 8, Finished, Available)