### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
import numpy as np
import pandas as pd
import json
import codecs
import os
import sys
sys.path.append('../')
import oci
from oci.config import validate_config
import ocifs
import ads

In [2]:
bucket_name = "tenen-raw-data"
config = oci.config.from_file(
        "~/.apikey/config",
        "DEFAULT")

In [3]:
object_storage_client = oci.object_storage.ObjectStorageClient(config)
namespace = object_storage_client.get_namespace().data
namespace

'lrhwvlagph9z'

In [4]:
def write2df(path):
    df = pd.read_csv(path, low_memory=False) # "/home/datascience/files/tenen_highlvl_oct.csv"
    return df    

In [5]:
target_file = {'oct' : 'tenen_high_lvl_oct.csv', 'nov': 'tenen_high_lvl_nov.csv', 'dec': 'tenen_high_lvl_dec.csv' }
products_file = {'oct' : 'tenen_products_oct.csv', 'nov': 'tenen_products_nov.csv', 'dec': 'tenen_products_dec.csv'}
hits_file = {'oct' : 'tenen_hits_oct.csv', 'nov': 'tenen_hits_nov.csv', 'dec': 'tenen_hits_dec.csv'}

In [18]:
fname = hits_file['dec']
directory = 'products'
config = {"config": "~/.apikey/config"}
path = f"/home/datascience/files/{fname}"
target = f'oci://{bucket_name}@{namespace}/{directory}/{fname}'
target, path

('oci://tenen-raw-data@lrhwvlagph9z/products/tenen_hits_dec.csv',
 '/home/datascience/files/tenen_hits_dec.csv')

In [None]:
#from ads.common.auth import default_signer
#ads.set_auth(auth="api_key", profile="DEFAULT")

df = pd.read_csv(target, storage_options={"config": "~/.apikey/config"},low_memory=False)

In [19]:
df = write2df(path)

In [20]:
df.shape

(1831008, 66)

In [21]:
df.columns

Index(['hits.hitNumber', 'hits.time', 'hits.isInteraction', 'hits.isEntrance',
       'hits.type', 'hits.dataSource', 'hits.uses_transient_token',
       'hits.page.pagePath', 'hits.page.hostname', 'hits.page.pageTitle',
       'hits.page.pagePathLevel1', 'hits.page.pagePathLevel2',
       'hits.page.pagePathLevel3', 'hits.page.pagePathLevel4',
       'hits.transaction.currencyCode', 'hits.item.currencyCode',
       'hits.appInfo.screenName', 'hits.appInfo.landingScreenName',
       'hits.appInfo.exitScreenName', 'hits.appInfo.screenDepth',
       'hits.exceptionInfo.isFatal', 'hits.eCommerceAction.action_type',
       'hits.eCommerceAction.step', 'hits.social.socialNetwork',
       'hits.social.hasSocialSourceReferral',
       'hits.social.socialInteractionNetworkAction',
       'hits.contentGroup.contentGroup1', 'hits.contentGroup.contentGroup2',
       'hits.contentGroup.contentGroup3', 'hits.contentGroup.contentGroup4',
       'hits.contentGroup.contentGroup5',
       'hits.content

In [22]:
target

'oci://tenen-raw-data@lrhwvlagph9z/products/tenen_hits_dec.csv'

In [23]:
df.to_csv(target, index=False, encoding='utf-8',storage_options = config)
#put_file(path+"tenen_highlvl_oct.csv", namespace, '/products/tenen_high_lvl_oct.csv')

In [None]:
df = df.astype(str)

In [None]:
from ads.dataset.factory import DatasetFactory
ds = DatasetFactory.open(df) 

In [None]:
ds.show_in_notebook()