# Database backup
Use mongodump to generate a backup of the database and update it to google drive.

Based on [PyDrive](https://pythonhosted.org/PyDrive/quickstart.html)

In [None]:
# required imports to access api_db, misc, misc.CONFIG, ...
import sys
sys.path = ['.', '..', '../..'] + sys.path
from collection import *

### Conditional Execution
Each file needs to verify if it should be executed or not based on the configurations (for some files this is not optional but all should have this section, even if it is tautological). Example:
```python
if not misc.CONFIG["collection"]["execute_this_script"]: exit()
```

In [None]:
# Conditional execution
if not misc.CONFIG["mongodb"]["drive_api_backup_enabled"]: exit()

### Drive authentication process instructions
1. Go to [pyfrice quickstart](https://pythonhosted.org/PyDrive/quickstart.html#authentication) and follow the instructions to setup the drive project and enable google drive api
2. create a [settings.yaml file](https://pythonhosted.org/PyDrive/oauth.html#automatic-and-custom-authentication-with-settings-yaml) in the same folder as this jupyter notebook also see the `example.settings.yaml` file
3. run this script once on a machine with a web browser and allow the API oauth flow (you can stop after `gauth.SaveCredentialsFile(config_file)`)
4. a `credentials.json` file will be generated on the same folder as the `settings.yaml` file
5. copy both files to the deploy server and from now on there won't be any need to interact with the script as the credentials will be reloaded

In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from pydrive.files import ApiRequestError

In [None]:
config_file = configs_abs_path("credentials.json")

In [None]:
gauth = GoogleAuth(settings_file=configs_abs_path("settings.yaml"))
# Try to load saved client credentials
gauth.LoadCredentialsFile(config_file)
if gauth.credentials is None:
    # Authenticate if they're not there
    gauth.LocalWebserverAuth()
elif gauth.access_token_expired:
    # Refresh them if expired
    gauth.Refresh()
else:
    # Initialize the saved creds
    gauth.Authorize()
# Save the current credentials to a file
gauth.SaveCredentialsFile(config_file)

<hr>
<h1 align="center">driver code</h1>

### First, do `mongodump` and zip it

In [None]:
import shutil

In [None]:
# creates the backup in the dump folder
output_folder = "dump"
#mongodump --uri="mongodb://root:PASSWORD@mongo:27017/" --gzip -o dump
os.system('mongodump --uri="%s" --gzip -o %s' % (misc.CONFIG["mongodb"]["address_docker"], output_folder))

# misc.CONFIG["mongodb"]["address"]

In [None]:
zip_filename = shutil.make_archive("twitter_watch_dump", 'zip', output_folder)

### Second, upload it to drive

In [None]:
drive = GoogleDrive(gauth)

In [None]:
# avoid re-uploading if there is already a version online
first_time_backup = "dump_file_drive_id" not in misc.CONFIG["mongodb"]
print("Is this the first time the database is backed up?: %s" % first_time_backup)

backup = drive.CreateFile()
if not first_time_backup:
    backup = drive.CreateFile({'id': misc.CONFIG["mongodb"]["dump_file_drive_id"]})

In [None]:
# dump.SetContentFile('cat.jfif')
backup.SetContentFile(zip_filename)

In [None]:
# try to upload and catch deletion exception
try: 
    backup.Upload()
except Exception as e: #ApiRequestError as e:
    pushbullet_notify("Google Drive DB backup failed due to: %s" % e)
    if "File not found" in str(e):
        print("File was deleted.")
        if not first_time_backup:
            with DoneMessage("Updating the configurations file to remove outdated file_id"):
                config = get_original_configs()
                del config["mongodb"]["dump_file_drive_id"]
                overwrite_configs(config)
        exit(1) # fail intentionally, this should trigger the automatic retry behaviour

In [None]:
if first_time_backup:
    with DoneMessage("Updating the configurations file to save the backup google drive file id"):
        config = get_original_configs()
        config["mongodb"]["dump_file_drive_id"] = backup["id"]
        overwrite_configs(config)

In [None]:
print("id: ", backup["id"])
print("download link: ", backup["webContentLink"])

In [None]:
# cleanup dump folder
try: shutil.rmtree(output_folder)
except Exception as e: print("falied to delete dump folder: %s" % e)

In [None]:
try: 
    del backup # pyDrive has a "bug/feature" and it is not possible to do os.remove without this
    os.remove(zip_filename)
except Exception as e: print("falied to delete zip_file: %s" % e)

In [None]:
print("DONE")