# Notebook de generación de un modelo sencillo

## Imports

In [1]:
import os
import sys
import json
import pickle
import sklearn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from onesaitplatform.iotbroker import IotBrokerClient

## Variables

In [2]:
PROJECT_FOLDER = "D:\EBUSTOS\DEVELOP\INDRA\model-microservice-example" #"path/to/folder"
simulator_project_folder = os.path.join(PROJECT_FOLDER, "simulation")
data_project_folder = os.path.join(PROJECT_FOLDER, "data")
scaler_file = os.path.join(data_project_folder, "precipitation_scaler.pkl")
model_file = os.path.join(data_project_folder, "precipitation_model.pkl")
label_file = os.path.join(data_project_folder, "labels.json")

In [3]:
if simulator_project_folder not in sys.path:
    sys.path.insert(0, simulator_project_folder)
from datasimulator import Simulator

In [4]:
client = IotBrokerClient(host="industryonesait.cwbyminsait.com",
                        iot_client="predictionclient", 
                         iot_client_token="1b5e4bf62cb94812a838e215b20cdcdd")
client.protocol = "https"
client.avoid_ssl_certificate = True
print(client)

Info - IotBrokerClient will be soon deprecated, please use DigitalClient instead
Client.IotBrokerClient(host=industryonesait.cwbyminsait.com, port=None, _Client__protocol=https, _Client__avoid_ssl_certificate=True, is_connected=False, iot_client=predictionclient, iot_clientId=predictionclient:PythonClient, iot_client_token=1b5e4bf62cb94812a838e215b20cdcdd, session_key=None)


In [5]:
client.join()



(True, {'sessionKey': 'da2cc152-6243-451f-bbb2-71f14391a9c3'})

In [6]:
# Para no sobrepasar las limitaciones de queries que puedan estar configuradas desde plataforma, 
# se puede realizar una query_batch() que realiza varias peticiones y no sobrecarga la red
ok_query, res_query = client.query_batch("predictionmodel", "db.predictionmodel.find()", "NATIVE", batch_size = 500)
len(res_query)









50000

In [7]:
client.leave()



(True, {'message': 'Disconnected'})

In [8]:
data_formated = [d["predictionmodel"] for d in res_query]
data_formated[0]

{'tmp0': 39.89, 'tmp1': 51.48, 'hPa': 1092, 'hum': 0.93, 'pp': 0}

In [9]:
df = pd.DataFrame(data_formated)

In [10]:
df.head()

Unnamed: 0,hPa,hum,pp,tmp0,tmp1
0,1092,0.93,0,39.89,51.48
1,936,0.57,0,7.2,7.01
2,981,0.66,0,18.55,20.12
3,1053,0.61,0,16.77,16.77
4,1018,0.14,0,41.3,35.17


In [11]:
df["pp_target"] = df["pp"] > 0
df_processed = df.drop("pp", axis=1)
df = None
df_processed.head()

Unnamed: 0,hPa,hum,tmp0,tmp1,pp_target
0,1092,0.93,39.89,51.48,False
1,936,0.57,7.2,7.01,False
2,981,0.66,18.55,20.12,False
3,1053,0.61,16.77,16.77,False
4,1018,0.14,41.3,35.17,False


In [12]:
train, test = train_test_split(df_processed, test_size=0.2)

In [13]:
col_target = "pp_target"
cols_features = [c for c in list(df_processed.columns) if c != col_target]
cols_features

['hPa', 'hum', 'tmp0', 'tmp1']

In [14]:
X_train = np.array(train[cols_features])
y_train = np.array(train[[col_target]])
print("Training data:")
print(X_train.shape, y_train.shape)

X_test = np.array(test[cols_features])
y_test = np.array(test[[col_target]])
print("Test data:")
print(X_test.shape, y_test.shape)

Training data:
(800, 4) (800, 1)
Test data:
(200, 4) (200, 1)


In [15]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [16]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
model =  KNeighborsClassifier(2)

In [18]:
model.fit(X_train, y_train)

  """Entry point for launching an IPython kernel.


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=2, p=2,
           weights='uniform')

In [19]:
score = model.score(X_test, y_test)
score

0.945

In [20]:
def simulate_one(cols_features, col_target):
    new_feature = Simulator.simulate(1)[0]
    feature = np.array([new_feature[col] for col in cols_features if col != col_target])
    return feature.reshape(1, 4)

one = simulate_one(cols_features, col_target)
one.shape

(1, 4)

In [21]:
model.predict(scaler.transform(one))

array([False])

In [22]:
# save the model to disk
pickle.dump(model, open(model_file, 'wb'))

In [23]:
# save the scaler to disk
pickle.dump(scaler, open(scaler_file, 'wb'))

In [24]:
# save labels to disk
labels = {"1":"precipitation", "0": "No precipitation"}
with open(label_file, 'w') as outfile:  
    json.dump(labels, outfile)