### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

# Import Packages

In [1]:
import ads
from ads.common.auth import default_signer
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from datetime import datetime
import io
import matplotlib.pyplot as plt
import oci
import os
from ads import set_auth

set_auth("resource_principal")

# Fatch Custom Environment Variables

In [6]:
n_trees = int(os.getenv('TREES'))
max_depth = int(os.getenv('DEPTH'))
print(n_trees,max_depth)

# Data import

In [2]:
ads.set_auth(auth='resource_principal')
bucket_name = 'filesdemo'
file_name_import = 'adult_income'
namespace = 'fro8fl9kuqli'
df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{file_name_import}", storage_options=default_signer())

# Model Training

In [3]:
X = df.drop('class', axis=1)
X=pd.get_dummies(X)
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3)

In [None]:
model = RandomForestClassifier(n_estimators=n_trees,max_depth=max_depth,random_state=42,n_jobs=-1)
model.fit(X_train, y_train)

# Export Result

In [None]:
y_test_pred = model.predict(X_test)
cm_test = confusion_matrix(y_test, y_test_pred)
cm_test_pct = cm_test / cm_test.sum(axis=1, keepdims=True) * 100
cm_test_pct

In [None]:
txt_buf = io.StringIO()
np.savetxt(txt_buf, cm_test_pct, fmt="%.4f", delimiter="\t")
txt_buf.seek(0)

object_name = "random_forest_confusion_matrix.txt"

signer = oci.auth.signers.get_resource_principals_signer()
object_storage = oci.object_storage.ObjectStorageClient({}, signer=signer)
namespace = object_storage.get_namespace().data

object_storage.put_object(
    namespace_name=namespace,
    bucket_name='filesdemo',
    object_name=object_name,
    put_object_body=io.BytesIO(txt_buf.getvalue().encode("utf-8"))
)