### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [9]:
!odsc  conda  install -s mlcpuv1

usage: odsc [-h] {conda,core-site} ...
odsc: error: unrecognized arguments: -y


In [1]:
import os
import sklearn
import pandas as pd
from sklearn.preprocessing import MinMaxScaler,LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.multioutput import MultiOutputClassifier
import random

import numpy as np

In [2]:
data=pd.read_csv('fake_dataset (1).csv', engine='python')

In [3]:
duplicates=['','ike','onate books ','alk to elderly','o to framer market','olunteer to a Cause','upport local produce',
            'uy  handmade products','upport startups ideas','ake an emergency package','ttend  community service',
            'elp someone with your skills','ontribute to local communities','ntroduce your self to your neighbours',
            'uy from "nothing group" facebook group','repare your emergency kit for fire diaster',
            'eliver grocery or medications to neighbours','nvitee community member to your celebrations',
            'ake a test to check your wellbeing after disaster','onate local hospitals and well being centres online',
            'heck if the house is at risk by looking at the flood map',
            'egister at flood warning service/ and bureau of meteorology warning service']
for i in range(data.shape[0]):
    temp_list=data['acts'].values[i].split(',')
    for item in temp_list:
        if item in duplicates:
            data['acts'].values[i]=None

In [4]:
data=data.dropna()
data.shape

(17520, 12)

In [5]:
acts_list=[]
for i in range(data.shape[0]):
    temp_list=data['acts'].values[i].split(',')
    for item in temp_list:
        if not (item in acts_list):
            acts_list.append(item)

In [6]:
for item in acts_list:
    data[item]=None

In [7]:

for i in range(data.shape[0]):
    temp_list=data['acts'].values[i].split(',')
    for item in acts_list:
        if item in temp_list:
            
            data[item].values[i]=1
        else:
            data[item].values[i]=0
'''
data['acts_num']='0'
for i in range(data.shape[0]):
    temp_list=data['acts'].values[i].split(',')
    temp_arr=['0']*39
    for item_i in range(len(acts_list)):
        if acts_list[item_i] in temp_list:
            temp_arr[item_i]='1'
    
    data['acts_num'].values[i]="".join(str(x) for x in temp_arr)
'''

'\ndata[\'acts_num\']=\'0\'\nfor i in range(data.shape[0]):\n    temp_list=data[\'acts\'].values[i].split(\',\')\n    temp_arr=[\'0\']*39\n    for item_i in range(len(acts_list)):\n        if acts_list[item_i] in temp_list:\n            temp_arr[item_i]=\'1\'\n    \n    data[\'acts_num\'].values[i]="".join(str(x) for x in temp_arr)\n'

In [8]:
data=data.drop(['acts','name'],axis=1)

In [9]:
outdoor_0_acts=['Prepare your emergency kit for flood diaster','Prepare your emergency kit for fire diaster',
               'Create educational material to show impact of disaster impact',
                'Register at flood warning service/ and bureau of meteorology warning service',
                'Check if the house is at risk by looking at the flood map',
                'Make an emergency package','Support startups ideas','Donate local hospitals and well being centres online',
                'Donate plants/tree to school or communitysupport local meetup group'
               ]
data['outdoor']=1
for i in range(data.shape[0]):
    for outdoor_act in outdoor_0_acts:
        if data[outdoor_act].values[i]:
            data['outdoor'].values[i]=0

In [10]:
disaster_history=pd.read_csv('disaster_history_coord.csv')
disaster_history.head(2)
fire_lat_lon=disaster_history[['lat','lon']][disaster_history['cateogory']=='Bushfire']
flood_lat_lon=disaster_history[['lat','lon']][disaster_history['cateogory']=='Flood']
fire_acts=['Prepare your emergency kit for fire diaster','Create educational material to show impact of disaster impact',
          'Make an emergency package']
flood_acts=['Create educational material to show impact of disaster impact',
           'Prepare your emergency kit for fire diaster','Register at flood warning service/ and bureau of meteorology warning service',
           'Check if the house is at risk by looking at the flood map','Make an emergency package']

for i in range(data.shape[0]):
    for fire_act in fire_acts:
        if data[fire_act].values[i]:
            rand_fire_act_ind=np.random.randint(fire_lat_lon.shape[0])
            dec_lat = random.random()/100
            dec_lng = random.random()/100
            data['lat'].values[i]=fire_lat_lon.values[rand_fire_act_ind,0]+dec_lat
            data['lng'].values[i]=fire_lat_lon.values[rand_fire_act_ind,1]+dec_lng
            
    for flood_act in flood_acts:
        if data[flood_act].values[i]:
            rand_flood_act_ind=np.random.randint(flood_lat_lon.shape[0])
            dec_lat = random.random()/100
            dec_lng = random.random()/100
            data['lat'].values[i]=flood_lat_lon.values[rand_flood_act_ind,0]+dec_lat
            data['lng'].values[i]=flood_lat_lon.values[rand_flood_act_ind,1]+dec_lng

            

In [11]:
all_cols=data.columns
input_cols=all_cols[:10]
out_cols=all_cols[10:]
t = [('num', MinMaxScaler(), input_cols)]
col_transform = ColumnTransformer(transformers=t,remainder='passthrough')

In [12]:
model = Pipeline(steps=[('transformer', col_transform),
                      #('classifier',RandomForestClassifier(n_estimators=100))
                       ('classifier',RandomForestClassifier())])

In [13]:
'''
for i in range(len(out_cols)):
    X=data.iloc[:,:10]#.astype(np.float32)
    y=data.iloc[:,10+i].astype(np.uint8)
    Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2)
    model.fit(Xtrain,ytrain)
    print('Training accuracy',model.score(Xtrain,ytrain))
    print('Test accuracy',model.score(Xtest,ytest))
'''
### MULTIMODEL TRAINING
model_list=[]
for col in out_cols:
    temp_model = Pipeline(steps=[('transformer', col_transform),
                  ('classifier',RandomForestClassifier(n_estimators=100))
                   #('classifier',KNeighborsClassifier())
                                ])

    X=data.iloc[:,:10]#.astype(np.float32)
    y=data[col].astype(np.uint8)
    Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=0.33,random_state=2021)
    temp_model.fit(Xtrain,ytrain)
    model_list.append(temp_model)


X=data.iloc[:,:10]#.astype(np.float32)
y=data.iloc[:,10:].astype(np.uint8)
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=0.33,random_state=2021)  
pred_ytest=np.zeros(ytest.shape)
pred_ytrain=np.zeros(ytrain.shape)
for i in range(y.shape[1]):
    pred_ytest[:,i]=model_list[i].predict(Xtest)
    pred_ytrain[:,i]=model_list[i].predict(Xtrain)

print('Hard accuracies')
total=0
correct=0
for i in range(ytrain.shape[0]):
    if (ytrain.values[i,:]==pred_ytrain[i,:]).all():
        correct+=1
    total+=1
print('Training accuracy',100*correct/total)


total=0
correct=0
for i in range(ytest.shape[0]):
    if (ytest.values[i,:]==pred_ytest[i,:]).all():
        correct+=1
    total+=1
print('Test accuracy',100*correct/total)


print('Soft accuracies')
total=0
correct=0
for i in range(ytrain.shape[0]):
    for j in range(ytrain.shape[1]):
        if (ytrain.values[i,j]==pred_ytrain[i,j]):
            correct+=1
        total+=1
print('Training accuracy',100*correct/total)


total=0
correct=0
for i in range(ytest.shape[0]):
    for j in range(ytest.shape[1]):
        if (ytest.values[i,j]==pred_ytest[i,j]):
            correct+=1
        total+=1
print('Test accuracy',100*correct/total)

Hard accuracies
Training accuracy 92.24740160163572
Test accuracy 0.0
Soft accuracies
Training accuracy 99.14369721832662
Test accuracy 80.83264596581787


In [14]:
        
def MultiModelpredict(name,age,lat,lng,disable,outdoor,preparation,community,local_support,environment,asset_protection):
    
    input_vals=np.array([age,lat,lng,disable,outdoor,preparation,community,local_support,environment,asset_protection])

    df_input=pd.DataFrame(input_vals.reshape(1,-1))
    df_input.columns=input_cols
    transformed_output=np.zeros(len(out_cols))
    transformed_output_probs=np.zeros(len(out_cols))
    for i in range(len(out_cols)):
        transformed_output[i]=np.squeeze(model_list[i].predict(df_input))
        transformed_output_probs[i]=max(np.squeeze(model_list[i].predict_proba(df_input)))

    pred_acts=[]
    confidence=[]
    for i in range(transformed_output.shape[0]):
        if transformed_output[i]:
            pred_acts.append(out_cols[i])
            confidence.append(transformed_output_probs[i])
    return pred_acts,confidence

In [15]:
MultiModelpredict('John Cena',50,-32.0525,115.8878,False,True,0,1,1,1,0)

(['Donate books ',
  'movies and newspaper to library',
  'Help in your neighbors garden',
  'Buy  handmade products',
  'Buy from "nothing group" facebook group',
  'Place a bird feeder',
  'Take a test to check your wellbeing after disaster'],
 [0.58, 0.61, 0.65, 0.64, 0.92, 0.59, 0.6])

In [51]:
#transformed_data=col_transform.fit_transform(data)

In [17]:
#model=RandomForestClassifier()
#model.fit(Xtrain, ytrain)

In [None]:
breakpoint

In [71]:
#!pip install flask  requests

Collecting flask
  Downloading Flask-1.1.2-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 3.4 MB/s  eta 0:00:01
Collecting itsdangerous>=0.24
  Downloading itsdangerous-1.1.0-py2.py3-none-any.whl (16 kB)
Installing collected packages: itsdangerous, flask
Successfully installed flask-1.1.2 itsdangerous-1.1.0


In [1]:
#!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-386.zip

--2021-04-18 21:13:22--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-386.zip
Resolving bin.equinox.io (bin.equinox.io)... 54.226.184.31, 3.212.138.198, 54.175.245.12, ...
Connecting to bin.equinox.io (bin.equinox.io)|54.226.184.31|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 14311173 (14M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-386.zip’


2021-04-18 21:13:26 (5.24 MB/s) - ‘ngrok-stable-linux-386.zip’ saved [14311173/14311173]

