### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [9]:
!odsc  conda  install -s mlcpuv1

usage: odsc [-h] {conda,core-site} ...
odsc: error: unrecognized arguments: -y


In [130]:
import os
import sklearn
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier


In [131]:
data=pd.read_csv('fake_dataset (1).csv', engine='python')

In [132]:
duplicates=['','ike','onate books ','alk to elderly','o to framer market','olunteer to a Cause','upport local produce',
            'uy  handmade products','upport startups ideas','ake an emergency package','ttend  community service',
            'elp someone with your skills','ontribute to local communities','ntroduce your self to your neighbours',
            'uy from "nothing group" facebook group','repare your emergency kit for fire diaster',
            'eliver grocery or medications to neighbours','nvitee community member to your celebrations',
            'ake a test to check your wellbeing after disaster','onate local hospitals and well being centres online',
            'heck if the house is at risk by looking at the flood map',
            'egister at flood warning service/ and bureau of meteorology warning service']
for i in range(data.shape[0]):
    temp_list=data['acts'].values[i].split(',')
    for item in temp_list:
        if item in duplicates:
            data['acts'].values[i]=None


In [133]:
data=data.dropna()
data.shape

(17520, 12)

In [134]:
acts_list=[]
for i in range(data.shape[0]):
    temp_list=data['acts'].values[i].split(',')
    for item in temp_list:
        if not (item in acts_list):
            acts_list.append(item)

In [135]:
for item in acts_list:
    data[item]=None

In [136]:
for i in range(data.shape[0]):
    temp_list=data['acts'].values[i].split(',')
    for item in acts_list:
        if item in temp_list:
            data[item].values[i]=1
        else:
            data[item].values[i]=0

In [137]:
data=data.drop(['acts','name'],axis=1)

In [138]:
all_cols=data.columns
input_cols=all_cols[:10]
out_cols=all_cols[10:]
t = [('num', MinMaxScaler(), all_cols)]
col_transform = ColumnTransformer(transformers=t,remainder='passthrough')

In [139]:
transformed_data=col_transform.fit_transform(data)

In [140]:
X=transformed_data[:,:10].astype(np.float32)
y=transformed_data[:,10:].astype(np.uint8)

In [141]:
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2)

In [142]:
print(Xtrain.shape)
print(Xtest.shape)
print(ytrain.shape)
print(ytest.shape)

(14016, 10)
(3504, 10)
(14016, 39)
(3504, 39)


In [143]:
model=RandomForestClassifier()
model.fit(Xtrain, ytrain)

RandomForestClassifier()

In [144]:
model.score(Xtest,ytest)

0.00028538812785388126

In [160]:
def predict(name,age,lat,lng,disable,outdoor,preparation,community,local_support,environment,asset_protection):
    
    input_vals=np.array([age,lat,lng,disable,outdoor,preparation,community,local_support,environment,asset_protection])
    input_arr=np.zeros(49)
    input_arr[:10]=input_vals
    df_input=pd.DataFrame(input_arr.reshape(1,-1))
    df_input.columns=all_cols
    transformed_input=col_transform.transform(df_input)
    
    transformed_output=np.squeeze(model.predict(transformed_input[0,:10].reshape(1,-1)))
    pred_acts=[]
    for i in range(transformed_output.shape[0]):
        if transformed_output[i]:
            pred_acts.append(out_cols[i])
    return pred_acts

In [161]:
predict('John Cena',50,-32.0525,115.8878,False,True,0,1,1,1,0)

['Donate plants/tree to school or communitysupport local meetup group',
 'Help in your neighbors garden',
 'Invitee community member to your celebrations',
 'Buy from "nothing group" facebook group',
 'Support startups ideas',
 'Try 5-min meditation']

In [157]:
len(out_cols)

39