### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

import torch
import pytorch_tabnet

from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabnet.callbacks import Callback

import mlflow

from sklearn.preprocessing import LabelEncoder

import ads
import oci
import os
import hashlib

from ads.dataset.factory import DatasetFactory

from utils_Class import ODSDataCatalog

In [2]:
help(ODSDataCatalog)

Help on class ODSDataCatalog in module utils_Copy1:

class ODSDataCatalog(builtins.object)
 |  Methods defined here:
 |  
 |  __init__(self, catalog_id, asset_key, namespace_id, bucket_name)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  get_hash_from_catalog(self, name)
 |  
 |  get_hash_from_file(self, FILE_NAME)
 |  
 |  get_key_from_name(self, name)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)



In [3]:
# setting security: using Resource Principal
ads.set_auth(auth='resource_principal')

rps = oci.auth.signers.get_resource_principals_signer()
dcat_client = oci.data_catalog.DataCatalogClient({}, signer=rps)

In [4]:
catalogo = ODSDataCatalog("ocid1.datacatalog.oc1.eu-frankfurt-1.aaaaaaaap6lel4hqckltn7fvjnux42jepzohdktkceywlyrnnrwgolbupzhq"
                         ,"6adf4ea3-67d5-44a3-b4f7-19fbf303d539","0e4d60d9-d5b5-467f-89bb-22db63a3ee18"
                         ,"credit_scoring")

In [5]:
# getting information from Data Catalog
FILE_NAME = "cs-training-nonull.csv"

# md5 read from Catalog
md5_cat = catalogo.get_hash_from_catalog(name=FILE_NAME)

print('File hash from Data Catalog is:', md5_cat)

# md5 computation
md5_computed = catalogo.get_hash_from_file(FILE_NAME=FILE_NAME)
print()
print('MD5 hash of the file is: ', md5_computed)

assert (md5_computed == md5_cat)
print("MD5 hash check OK")

File hash from Data Catalog is: 0b43cf47b2a1b336991f2d43b16d0c1e


HBox(children=(HTML(value='loop1'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

The dataset contains: 150000 records

MD5 hash of the file is:  0b43cf47b2a1b336991f2d43b16d0c1e
MD5 hash check OK
