### OCI Data Science - Useful Tips
Everything stored in the <span style="background-color: #d5d8dc ">/home/datascience</span> folder is now stored on your block volume drive. The <span style="background-color: #d5d8dc ">ads-examples</span> folder has moved outside of your working space. Notebook examples are now accessible through a Launcher tab "Notebook Examples" button.
<details>
<summary><font size="2">1. Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">2. OCI Configuration and Key Files Set Up</font></summary><p>Follow the instructions in the getting-started notebook. That notebook is accessible via the "Getting Started" Launcher tab button.</p>
</details>
<details>
<summary><font size="2">3. Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">4. Typical Cell Imports and Settings</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import MLData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">5. Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [3]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
import pandas as pd
import logging
import os
import cx_Oracle
from datetime import datetime
from datetime import timedelta
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

In [4]:
# Add TNS_ADMIN to the environment
os.environ['TNS_ADMIN'] = "/home/datascience/ADB"
connection = cx_Oracle.connect('hackathon', 'bCfrCC0981267', 'adw_low')

In [155]:
TABELAS_AQUISICAO = pd.read_sql("SELECT table_name FROM ALL_tables WHERE UPPER(TABLE_NAME) LIKE '%AQUI%'", con=connection) 

In [156]:
TABELAS_AQUISICAO

Unnamed: 0,TABLE_NAME
0,EVE_MODELO_ENGAJAMENTO_AQUISICAO_HCKT
1,EVE_MODELO_PERFILPJ_ATC_AQUISICAO_HCKT
2,EVE_PUBLICO_AQUISICAO_HCKT
3,EVE_BUFFER_CCI_AQUISICAO_HCKT
4,EVE_BUFFER_ATC_AQUISICAO_HCKT


In [157]:
BUFFER_CRF = pd.read_sql('SELECT * FROM EVE_BUFFER_CCI_AQUISICAO_HCKT where rownum < 10', con=connection) 
BUFFER_CRF.head()

Unnamed: 0,CPF_CRIP,CUP_ID_CUPOM,ICUP_DT_CUPOM,PCU_CD_FORPAGCUPOM,IVA_CD_ITEM,IVA_DS_ITEM,PRF_DS_SUBFAMILIA,PRF_DS_FAMILIA,PRF_DS_GRUPO,PRF_DS_SETOR,...,ICUP_VL_TOTALITEM,ICUP_VL_PAGOCARTAO,ICUP_VL_BONUS,ICUP_VL_BONUSCCI,ICUP_QT_ITEMCOMPD,MERC_DS_EMPRS,MERC_DS_TIPO_ESTABELECIMENTO,MERC_DS_DIVISAO,MERC_DS_NOMELOJA,ICUP_DT_REF
0,1EE1D82CBFC,2230257116,2020-06-22,12,6355870,ETANOL HIDRATADO COMBUSTIVEL COMUM,ALCOOL HIDRATADO,ALCOOL HIDRATADO,COMBUSTIVEIS,POSTO E SERVICOS,...,52.82,0.0,0.0,0.0,18.219,1- CARREFOUR,5-POSTO,POSTO,POSTO AP. GOIANIA,20200622
1,1EE1D82CBFC,2230204603,2020-06-22,12,8258015,KIT 3 MEIAS INV OM002 MASH BCO 39A44,MEIAS KIT ESPORTE,MEIAS KIT ESPORTE,INTIMA MASCULINO,TEXTIL - PERMANENTE,...,17.99,0.0,0.0,0.0,1.0,1- CARREFOUR,4-HIPER,121-HIPER OUTRAS REGIOES,Goiania Sudoeste,20200622
2,1EE1D82CBFC,2233610601,2020-06-29,12,6355862,GASOLINA C ADITIVADA,GASOLINA ADITIVADA,GASOLINA ADITIVADA,COMBUSTIVEIS,POSTO E SERVICOS,...,1.5,0.0,0.0,0.0,0.34,1- CARREFOUR,5-POSTO,POSTO,POSTO AP. GOIANIA,20200629
3,1EE1D82CBFC,2244771330,2020-07-20,12,6355870,ETANOL HIDRATADO COMBUSTIVEL COMUM,ALCOOL HIDRATADO,ALCOOL HIDRATADO,COMBUSTIVEIS,POSTO E SERVICOS,...,29.27,0.0,0.0,0.0,11.05,1- CARREFOUR,5-POSTO,POSTO,POSTO AP. GOIANIA,20200720
4,1EE1D82CBFC,2230204603,2020-06-22,12,5622867,KIT 3 CUECA SLIP ELAS AP POLO WOR 133 GG,CUECA KIT,CUECA KIT,INTIMA MASCULINO,TEXTIL - PERMANENTE,...,15.99,0.0,0.0,0.0,1.0,1- CARREFOUR,4-HIPER,121-HIPER OUTRAS REGIOES,Goiania Sudoeste,20200622


In [158]:
BUFFER_CRF = pd.read_sql('SELECT count(distinct CPF_CRIP),PRF_DS_SETOR,PRF_DS_GRUPO,PRF_DS_FAMILIA,PRF_DS_SUBFAMILIA FROM EVE_BUFFER_CCI_AQUISICAO_HCKT group by PRF_DS_SETOR,PRF_DS_GRUPO,PRF_DS_FAMILIA,PRF_DS_SUBFAMILIA', con=connection) 
BUFFER_CRF.head()

Unnamed: 0,COUNT(DISTINCTCPF_CRIP),PRF_DS_SETOR,PRF_DS_GRUPO,PRF_DS_FAMILIA,PRF_DS_SUBFAMILIA
0,42429,P.A.S.,LEITE,LTE AROMATIZADO ATE 499ML,LTE AROMATIZADO ATE 499ML
1,1306,LIQUIDA,AGUA SABORIZADA,ACIMA DE 601ML REGULAR,ACIMA DE 601ML REGULAR
2,49810,PERFUMARIA,HIGIENE ORAL,CREME DENTAL BRANQUEADOR,CREME DENTAL BRANQUEADOR
3,98232,SECA,SALGADINHOS,BATATA TRADICIONAL,BATATA TRADICIONAL
4,144381,P.A.S.,PAO,PAO FORMA REGULAR COM CASCA,PAO FORMA REGULAR COM CASCA


In [159]:
BUFFER_CRF = pd.read_sql('SELECT PRF_DS_DEPARTAMENTO,PRF_DS_SETOR,PRF_DS_FAMILIA, count(distinct CPF_CRIP) FROM EVE_BUFFER_CCI_MANU_HCKT  group by PRF_DS_DEPARTAMENTO,PRF_DS_SETOR,PRF_DS_FAMILIA' , con=connection) 
BUFFER_CRF.head()

Unnamed: 0,PRF_DS_DEPARTAMENTO,PRF_DS_SETOR,PRF_DS_FAMILIA,COUNT(DISTINCTCPF_CRIP)
0,PGC,D P H,MAQUINA PO,2672
1,PERECIVEIS,SALSICHARIA,MUSSARELA FATIADA,101265
2,PGC,SECA,MILHO LATA,49109
3,PGC,PERFUMARIA,CONDICIONADOR REGULAR,30095
4,PERECIVEIS,F.L.V.,PERAS,32720


In [160]:
BUFFER_CRF = pd.read_sql('SELECT * FROM EVE_BUFFER_CCI_MANU_HCKT where rownum < 10 ' , con=connection) 
BUFFER_CRF.head()

Unnamed: 0,CPF_CRIP,TICU_ID_ITEMCUPOM,PRF_DS_DEPARTAMENTO,PRF_DS_SETOR,PRF_DS_FAMILIA,TICU_VL_TOTALITEM,ANOMES
0,7EFA81D66F4,5611166865,PGC,SECA,TABLETE CARNES,2.58,202003
1,AACB24BDAA9,5579249007,PGC,SECA,ORGANICOS ACUCAR,4.39,202003
2,3B6E504EA30,5579334402,PGC,SECA,CHOCOLATE TABLETE MINI,73.91,202003
3,108DF671A9D,5579287964,PGC,P.A.S.,SEM SAL REGULAR,5.29,202003
4,A42CF566521,5579407386,PGC,SECA,ORGANICOS ACUCAR,3.39,202003


In [161]:
PROBE_CRF = pd.read_sql('SELECT * FROM EVE_PROBE_MANU_HCKT where rownum < 10 ' , con=connection) 
PROBE_CRF.head()

Unnamed: 0,NUMBERX_CRIPT,RSAC_DATE_REF,RSAC_CURR_STATUS,RSAC_FL_ATIVO,RSAC_EVENT_COLLECTIONS,RSAC_EVENT_CYCLE_POINT,RSAC_CURRENT_BALANCE,RSAC_TIME_SINC_ACC_OPNED_MMMDD,RSAC_CURRENT_CREDIT_LIMIT,MDA,...,RSAC_TOTAL_BALANCE_CYCLE_4,RSAC_TOTAL_BALANCE_CYCLE_5,RSAC_TOTAL_BALANCE_CYCLE_6,RSAC_TOTAL_PAYMENTS_CYCLE_1,RSAC_TOTAL_PAYMENTS_CYCLE_2,RSAC_VALUE_PAYMENTS,FL_SALDO_OU_PARCELADO_L6M,FL_SALDO_L6M,RSAC_ZIP_CODE,CD_POSICAO
0,C961E9BC1E3,20200114,NORM,1,0,1,42.76,1821,2020,0,...,185.08,113.79,378.34,248.25,215.64,274.51,1,1,71261060,202001
1,F22EAEB935E,20200114,NORM,0,1,0,714.03,8008,2472,1,...,2429.67,2427.93,1063.25,1440.53,673.69,1440.53,1,1,38400694,202001
2,0AFA4C55B0B,20200114,NORM,0,1,0,1365.37,1811,1360,2,...,1305.56,502.93,998.05,898.53,1089.9,898.53,1,1,25220600,202001
3,58DA9429896,20200114,NORM,1,0,1,0.0,1821,3200,0,...,79.9,0.0,0.0,0.0,0.0,0.0,1,1,12630000,202001
4,5D258433303,20200114,NORM,0,1,0,102.38,1815,966,1,...,117.27,117.28,56.89,98.37,101.83,98.37,1,1,53401690,202001


In [162]:
PROBE_CRF = pd.read_sql('SELECT * FROM EVE_POSSUISEGUROS_MANU_HCKT where rownum < 10 ' , con=connection) 
PROBE_CRF.head()

Unnamed: 0,NUMBERX_CRIPT,MESREF,FL_FATURA_PROTEGIDA,FL_SEGURACO,FL_LAR_SEGURO,FL_HOSPITALAR,FL_SORTE_GRANDE,FL_ODONTO,FL_CONTA_PAGA,FL_PERDA_E_ROUBO,FL_PROTECAO_PESSOAL
0,2A35D858E44,202001,0,0,0,0,0,0,1,1,0
1,2A35D858E44,202002,0,0,0,0,0,0,1,1,0
2,2A35D858E44,202003,0,0,0,0,0,0,1,1,0
3,2A35D858E44,202005,0,0,0,0,0,0,1,1,0
4,2A35D858E44,202006,0,0,0,0,0,0,1,1,0


In [163]:
PROBE_CRF = pd.read_sql('''SELECT COUNT(DISTINCT NUMBERX_CRIPT) AS QTDE
                              ,SUM(FL_FATURA_PROTEGIDA)
                              ,SUM(FL_SEGURACO)
                              ,SUM(FL_LAR_SEGURO)
                              ,SUM(FL_HOSPITALAR)
                              ,SUM(FL_SORTE_GRANDE)
                              ,SUM(FL_ODONTO)
                              ,SUM(FL_CONTA_PAGA)
                              ,SUM(FL_PERDA_E_ROUBO)
                              ,SUM(FL_PROTECAO_PESSOAL)
                          FROM EVE_POSSUISEGUROS_MANU_HCKT 
                          WHERE MESREF = '202005' ''', con=connection) 
PROBE_CRF

Unnamed: 0,QTDE,SUM(FL_FATURA_PROTEGIDA),SUM(FL_SEGURACO),SUM(FL_LAR_SEGURO),SUM(FL_HOSPITALAR),SUM(FL_SORTE_GRANDE),SUM(FL_ODONTO),SUM(FL_CONTA_PAGA),SUM(FL_PERDA_E_ROUBO),SUM(FL_PROTECAO_PESSOAL)
0,666100,312893,308727,33556,6305,87419,1520,66058,34185,2459


In [164]:
PRODUCTS = pd.read_sql('SELECT * FROM EVE_PRODUTOS_MANU_HCKT where rownum < 10' , con=connection) 
PRODUCTS.head()        

Unnamed: 0,CACCSERNO_CRIPT,CD_POSICAO,VL_COMPRA_ON_PARC_CJ,VL_COMPRA_ON_PARC_SJ,VL_COMPRA_OFF_PARC_CJ,VL_COMPRA_OFF_PARC_SJ,VL_COMPRA_PARC_CJ,VL_COMPRA_PARC_SJ,VL_EMPRESTIMO,VL_PAGCONTAS,VL_PARCELA_PRONTA,VL_PARCELE,VL_SAQUE,VL_SAQUE_PARC,VL_SAQUE_ROT
0,2A232F,202001,0.0,0.0,0.0,369.67,0.0,369.67,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,B79BE6,202001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1890.34,0.0,0.0,0.0
2,35E713,202001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,203.87,0.0,0.0,0.0,0.0
3,5B183B,202001,0.0,0.0,0.0,292.22,0.0,292.22,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,B31378,202001,0.0,0.0,0.0,183.32,0.0,183.32,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [165]:
PRODUCTS = pd.read_sql('''SELECT COUNT(DISTINCT CACCSERNO_CRIPT)QTDE
                                ,CASE WHEN VL_EMPRESTIMO > '0' THEN '1' end VL_EMPRESTIMO
                                ,CASE WHEN VL_PAGCONTAS > '0' THEN '1' end VL_PAGCONTAS
                                ,CASE WHEN VL_PARCELA_PRONTA > '0' THEN '1' end VL_PARCELA_PRONTA 
                                ,CASE WHEN VL_PARCELE > '0' THEN '1' end VL_PARCELE
                                ,CASE WHEN VL_SAQUE > '0' THEN '1' end VL_SAQUE
                           FROM EVE_PRODUTOS_MANU_HCKT
                          where CD_POSICAO ='202008'
                       group by  CASE WHEN VL_EMPRESTIMO > '0' THEN '1' end
                                ,CASE WHEN VL_PAGCONTAS > '0' THEN '1' end
                                ,CASE WHEN VL_PARCELA_PRONTA > '0' THEN '1'end
                                ,CASE WHEN VL_PARCELE > '0' THEN '1'end
                                ,CASE WHEN VL_SAQUE > '0' THEN '1'end
                         ''', con=connection) 
PRODUCTS.head()        

Unnamed: 0,QTDE,VL_EMPRESTIMO,VL_PAGCONTAS,VL_PARCELA_PRONTA,VL_PARCELE,VL_SAQUE
0,34508,,,,,
1,2,1.0,1.0,,,
2,564,,,,1.0,
3,26,1.0,,,,
4,8,,,1.0,1.0,


In [22]:
#identificando os produtos e services financeiros que os clientes possuem
PRODUCTS = pd.read_sql('''SELECT count(a.CACCSERNO_CRIPT) AS TOTAL
                                ,SUM((CASE WHEN B.CACCSERNO_CRIPT IS NOT NULL THEN 1 else 0 END)) AS EMPRESTIMO
                                ,SUM((CASE WHEN C.CACCSERNO_CRIPT IS NOT NULL THEN 1 else 0 END)) AS VL_PAGCONTAS
                                ,SUM((CASE WHEN D.CACCSERNO_CRIPT IS NOT NULL THEN 1 else 0 END)) AS VL_PARCELA_PRONTA
                                ,SUM((CASE WHEN E.CACCSERNO_CRIPT IS NOT NULL THEN 1 else 0 END)) AS VL_PARCELE
                                ,SUM((CASE WHEN F.CACCSERNO_CRIPT IS NOT NULL THEN 1 else 0 END)) AS VL_SAQUE
                                ,SUM((CASE WHEN G.NUMBERX_CRIPT IS NOT NULL THEN 1 else 0 END)) AS SEGURO
                            FROM (select a.CACCSERNO_CRIPT,b.NUMBERX_CRIPT
                                    from EVE_SUMARIOCONTA_MANU_HCKT a 
                              inner join EVE_CHAVES_MANU_HCKT b on a.CACCSERNO_CRIPT = b.CACCSERNO_CRIPT 
                                   where date_ref ='202009' ) a
                            left join (SELECT DISTINCT CACCSERNO_CRIPT
                                         FROM EVE_PRODUTOS_MANU_HCKT
                                        where CD_POSICAO ='202009' 
                                          AND VL_EMPRESTIMO >0 ) b on  A.CACCSERNO_CRIPT = b.CACCSERNO_CRIPT
                           LEFT JOIN (SELECT DISTINCT CACCSERNO_CRIPT
                                     FROM EVE_PRODUTOS_MANU_HCKT
                                    where CD_POSICAO ='202009'
                                      AND VL_PAGCONTAS > '0') c ON A.CACCSERNO_CRIPT = c.CACCSERNO_CRIPT
                          LEFT JOIN (SELECT DISTINCT CACCSERNO_CRIPT
                                     FROM EVE_PRODUTOS_MANU_HCKT
                                     where CD_POSICAO ='202009'
                                      AND VL_PARCELA_PRONTA > '0') d ON A.CACCSERNO_CRIPT = d.CACCSERNO_CRIPT
                                 LEFT JOIN (SELECT DISTINCT CACCSERNO_CRIPT
                                     FROM EVE_PRODUTOS_MANU_HCKT
                                    where CD_POSICAO ='202009'
                                     AND VL_PARCELE > '0') e ON A.CACCSERNO_CRIPT = e.CACCSERNO_CRIPT
                             LEFT JOIN (SELECT DISTINCT CACCSERNO_CRIPT
                                     FROM EVE_PRODUTOS_MANU_HCKT
                                    where CD_POSICAO ='202009'
                                     AND VL_SAQUE > '0') f ON A.CACCSERNO_CRIPT = f.CACCSERNO_CRIPT 
                             LEFT JOIN (SELECT DISTINCT NUMBERX_CRIPT
                                          FROM EVE_POSSUISEGUROS_MANU_HCKT 
                                          WHERE MESREF = '202009') G ON A.NUMBERX_CRIPT = G.NUMBERX_CRIPT 
                              '''  , con=connection) 
PRODUCTS

Unnamed: 0,TOTAL,EMPRESTIMO,VL_PAGCONTAS,VL_PARCELA_PRONTA,VL_PARCELE,VL_SAQUE,SEGURO
0,122670,66,857,1780,1202,0,45205


In [190]:
CONTA = pd.read_sql('SELECT * FROM EVE_SUMARIOCONTA_MANU_HCKT where rownum < 10'  , con=connection) 
CONTA.head()

Unnamed: 0,CACCSERNO_CRIPT,PRAM_NU_ANOMESSAFRA,DMSC_NU_MOB,DMSC_CD_FAIXABHVRSCORE,DMSC_FG_CONTAATIVDA,DMSC_FG_CONTAATIV,DMSC_FG_CONTAAPTA,DMSC_FG_RECUP,DMSC_FG_OVER30,DMSC_FG_OVER60,...,DMSC_PC_OVERLIM,DMSC_VL_COMPAVISTAON,DMSC_VL_COMPAVISTAOFF,DMSC_VL_COMPPRCLAON,DMSC_VL_COMPPRCLAOFF,DMSC_QT_COMPAVISTAON,DMSC_QT_COMPAVISTAOFF,DMSC_QT_COMPPRCLAON,DMSC_QT_COMPPRCLAOFF,DATE_REF
0,C5AD7D,200910,131,18,1,1,1,0,0,0,...,50.0,0.0,1325.87,0.0,0.0,0,20,0,0,202009
1,C5AD7D,200910,124,15,1,1,1,0,0,0,...,50.0,668.18,3012.74,0.0,0.0,0,44,0,0,202002
2,C5AD7D,200910,126,18,1,1,1,0,0,0,...,0.0,0.0,2325.63,0.0,0.0,0,45,0,0,202004
3,C5AD7D,200910,125,15,1,1,1,0,0,0,...,0.0,103.69,2262.77,0.0,0.0,0,31,0,0,202003
4,C5AD7D,200910,128,15,1,1,1,0,0,0,...,50.0,966.47,2865.54,0.0,0.0,0,46,0,0,202006


In [10]:
#identificando que a base de contas está com criptografia duplicado, dados não estão 100% 
CONTA = pd.read_sql(''' select * 
                        from (SELECT CACCSERNO_CRIPT,row_number()  over (partition by CACCSERNO_CRIPT  order by CACCSERNO_CRIPT) as rap
                               from EVE_SUMARIOCONTA_MANU_HCKT where date_ref ='202009') 
                         where rap > 1
                          ''' , con=connection) 
CONTA

Unnamed: 0,CACCSERNO_CRIPT,RAP
0,04C92F,2
1,04CB7B,2
2,05D4E9,2
3,091D18,2
4,0A1EA7,2
5,0B43BD,2
6,0D2BBF,2
7,0DC090,2
8,0EBFBB,2
9,0F08DB,2
