### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
import ads
import oci
import pandas as pd
from ads.dataset.factory import DatasetFactory

In [2]:
ads.set_auth(auth="resource_principal")
#ads.set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT")

In [3]:
bucket_name="hosted-ds-datasets"
namespace="bigdatadatasciencelarge"
filename="synthetic/orcl_attrition.csv"
#bucket_name="bucket1"
#namespace="idsjrzqfxswo"
#filename="orcl_attrition.csv"
df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{filename}")
df

Unnamed: 0,Age,Attrition,TravelForWork,SalaryLevel,JobFunction,CommuteLength,EducationalLevel,EducationField,Directs,EmployeeNumber,...,WeeklyWorkedHours,StockOptionLevel,YearsinIndustry,TrainingTimesLastYear,WorkLifeBalance,YearsOnJob,YearsAtCurrentLevel,YearsSinceLastPromotion,YearsWithCurrManager,name
0,42,Yes,infrequent,5054,Product Management,2,L2,Life Sciences,1,1,...,80,0,8,0,1,6,4,0,5,Tracy Moore
1,50,No,often,1278,Software Developer,9,L1,Life Sciences,1,2,...,80,1,10,3,3,10,7,1,7,Andrew Hoover
2,38,Yes,infrequent,6296,Software Developer,3,L2,Other,1,4,...,80,0,7,3,3,0,0,0,0,Julie Bell
3,34,No,often,6384,Software Developer,4,L4,Life Sciences,1,5,...,80,0,8,3,3,8,7,3,0,Thomas Adams
4,28,No,infrequent,2710,Software Developer,3,L1,Medical,1,7,...,80,1,6,3,3,2,2,2,2,Johnathan Burnett
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,37,No,often,4054,Software Developer,24,L2,Medical,1,2061,...,80,1,17,3,3,5,2,0,3,Benjamin Russell
1466,40,No,infrequent,2810,Software Developer,7,L1,Medical,1,2062,...,80,1,9,5,3,7,7,1,7,Ellen Parsons
1467,28,No,infrequent,710,Software Developer,5,L3,Life Sciences,1,2064,...,80,1,6,0,3,6,2,0,3,Tyler Greene
1468,50,No,often,4692,Product Management,3,L3,Medical,1,2065,...,80,0,17,3,2,9,6,0,8,Jessica Yang


In [4]:
df.to_csv("orcl_attrition.csv", index=False)

In [5]:
ds = DatasetFactory.open(f"oci://{bucket_name}@{namespace}/{filename}")


loop1:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
ds.head(10)

Unnamed: 0,Age,Attrition,TravelForWork,SalaryLevel,JobFunction,CommuteLength,EducationalLevel,EducationField,Directs,EmployeeNumber,...,WeeklyWorkedHours,StockOptionLevel,YearsinIndustry,TrainingTimesLastYear,WorkLifeBalance,YearsOnJob,YearsAtCurrentLevel,YearsSinceLastPromotion,YearsWithCurrManager,name
0,42,Yes,infrequent,5054,Product Management,2,L2,Life Sciences,1,1,...,80,0,8,0,1,6,4,0,5,Tracy Moore
1,50,No,often,1278,Software Developer,9,L1,Life Sciences,1,2,...,80,1,10,3,3,10,7,1,7,Andrew Hoover
2,38,Yes,infrequent,6296,Software Developer,3,L2,Other,1,4,...,80,0,7,3,3,0,0,0,0,Julie Bell
3,34,No,often,6384,Software Developer,4,L4,Life Sciences,1,5,...,80,0,8,3,3,8,7,3,0,Thomas Adams
4,28,No,infrequent,2710,Software Developer,3,L1,Medical,1,7,...,80,1,6,3,3,2,2,2,2,Johnathan Burnett
5,33,No,often,4608,Software Developer,3,L2,Life Sciences,1,8,...,80,0,8,2,2,7,7,3,6,Rhonda Grant
6,60,No,infrequent,6072,Software Developer,4,L3,Medical,1,10,...,80,3,12,3,2,1,0,0,0,Brandon Gill
7,31,No,infrequent,6228,Software Developer,25,L1,Life Sciences,1,11,...,80,1,1,2,3,1,0,0,0,Debbie Chan
8,39,No,often,990,Software Developer,24,L3,Life Sciences,1,12,...,80,0,10,2,3,9,7,1,8,Kayla Ward
9,37,No,infrequent,5958,Software Developer,28,L3,Medical,1,13,...,80,2,17,3,2,7,7,7,7,Angel Vaughn


In [8]:
#df = pd.DataFrame.ads.read_sql("select * from hr", connection_parameters={"user_name":"", "password":"", "wallet_location"="", "service_name"=""})

In [9]:
#connection_parameters={"user_name":"", "password":"", "service_name"="", "host"="", "port"=""}