# BANK MARKETING

<br><br>
Membros:
- Anderson Jesus
- Caio Viera
- Pedro Correia



> UTILIZAÇÃO DE MODELO PREDITIVO TREINADO ANTERIORMENTE

#### Inicializando sessão do Spark

In [1]:
import findspark
findspark.init('/home/pfcor/spark-2.1.0-bin-hadoop2.7')

In [2]:
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
spark = SparkSession.builder.appName('bank').getOrCreate()

#### Carregando Modelo

In [4]:
from pyspark.ml import PipelineModel

pipelineModel = PipelineModel.load('model/bank-pipeline-model-res/')

#### Carregando os Dados

In [5]:
data = spark.read.csv(
    'data/new-data.csv',
    sep=';',
    header=True,
    inferSchema=True
)

data = data.selectExpr(*["`{}` as {}".format(col, col.replace('.', '_')) for col in data.columns])

#### Realizando as previsões

In [6]:
predictions = pipelineModel.transform(data)

Este notebook simula a entrada de novos dados nunca vistos pelo modelo. No caso, no entanto, nós temos os labels e podemos diretamente medir o desempenho.

In [7]:
predictions.select('label', 'prediction').createOrReplaceTempView('predictions')

spark.sql("""
SELECT
    round((tp+tn)/(tp+tn+fp+fn), 4) as accuracy,
    round(tp/(tp+fp), 4) as precision,
    round(tp/(tp+fn), 4) as recall
FROM (
    SELECT
        sum(tn) as tn,
        sum(tp) as tp,
        sum(fn) as fn,
        sum(fp) as fp
    FROM (
        SELECT
            case when label = 0 and prediction = 0 then 1 else 0 end as tn,
            case when label = 1 and prediction = 1 then 1 else 0 end as tp,
            case when label = 1 and prediction = 0 then 1 else 0 end as fn,
            case when label = 0 and prediction = 1 then 1 else 0 end as fp
        FROM
            predictions
    )
)
""").show()

+--------+---------+------+
|accuracy|precision|recall|
+--------+---------+------+
|  0.8558|    0.426|0.5685|
+--------+---------+------+



<hr>

In [9]:
import datetime as dt

In [15]:
timestamp = dt.datetime.strftime(dt.datetime.now(), '%Y-%m-%d')

'2018-08-03'

In [18]:
predictions.columns

['age',
 'job',
 'marital',
 'education',
 'default',
 'housing',
 'loan',
 'contact',
 'month',
 'day_of_week',
 'duration',
 'campaign',
 'pdays',
 'previous',
 'poutcome',
 'emp_var_rate',
 'cons_price_idx',
 'cons_conf_idx',
 'euribor3m',
 'nr_employed',
 'y',
 'y_numeric',
 'label',
 'job_index',
 'job_class_vec',
 'marital_index',
 'marital_class_vec',
 'education_index',
 'education_class_vec',
 'default_index',
 'default_class_vec',
 'housing_index',
 'housing_class_vec',
 'loan_index',
 'loan_class_vec',
 'contact_index',
 'contact_class_vec',
 'month_index',
 'month_class_vec',
 'day_of_week_index',
 'day_of_week_class_vec',
 'poutcome_index',
 'poutcome_class_vec',
 'features',
 'prediction']

In [22]:
predictions.select('label', 'prediction', predictions['features'].cast('string')).printSchema()

root
 |-- label: double (nullable = true)
 |-- prediction: double (nullable = true)
 |-- features: string (nullable = true)

