In [1]:
# Modules Functions
# ======================================================================
from utils.years_functions import *
from src.database.database_functions import *
from src.models.database_models import Model

# Kafka Consumer
# ======================================================================
from kafka import KafkaConsumer

# Serialization and Deserialization
# ======================================================================
import json

# Logging and Event Handling
# ======================================================================
import logging as log

# Data Handling
# ======================================================================
import pandas as pd

# Machine Learning
# ======================================================================
import joblib
from sklearn.metrics import r2_score


In [2]:
log.basicConfig(level=log.INFO)

In [3]:
log.info('Starting consumer...')
consumer = KafkaConsumer('prueba-2',
                         bootstrap_servers='localhost:9092',
                         value_deserializer=lambda m: json.loads(m.decode('utf-8')),
                         consumer_timeout_ms=10000,
                         auto_offset_reset='earliest',
                         enable_auto_commit=True)

captured_data = []

for message in consumer:
    data = message.value
    captured_data.append(data)
    offset = message.offset
    log.info(f'Message received successfully in offset: {offset}')
        
log.info('Waiting for more messages...')

log.info('Closing consumer')

INFO:root:Starting consumer...
INFO:kafka.conn:<BrokerConnection node_id=bootstrap-0 host=localhost:9092 <connecting> [IPv6 ('::1', 9092, 0, 0)]>: connecting to localhost:9092 [('::1', 9092, 0, 0) IPv6]
INFO:kafka.conn:Probing node bootstrap-0 broker version
INFO:kafka.conn:<BrokerConnection node_id=bootstrap-0 host=localhost:9092 <connecting> [IPv6 ('::1', 9092, 0, 0)]>: Connection complete.
INFO:kafka.conn:Broker version identified as 2.5.0
INFO:kafka.conn:Set configuration api_version=(2, 5, 0) to skip auto check_version requests on startup
INFO:kafka.consumer.subscription_state:Updating subscribed topics to: ('prueba-2',)
INFO:kafka.consumer.subscription_state:Updated partition assignment: [TopicPartition(topic='prueba-2', partition=0)]
INFO:kafka.conn:<BrokerConnection node_id=1 host=localhost:9092 <connecting> [IPv6 ('::1', 9092, 0, 0)]>: connecting to localhost:9092 [('::1', 9092, 0, 0) IPv6]
INFO:kafka.conn:<BrokerConnection node_id=1 host=localhost:9092 <connecting> [IPv6 ('::

In [4]:
captured_data = [json.loads(data) for data in captured_data]
df = pd.json_normalize(captured_data)

In [5]:
model = joblib.load('ml_model/gbr_model.pkl')

In [6]:
df_model = df.drop(columns=['Happiness_Score', 'id'], axis=1)

predictions = model.predict(df_model)

df['Predicted_Happiness_Score'] = predictions

In [7]:
column_order = ['id', 'Social_Support', 'Year', 'Trust', 'Generosity','Health', 'Economy', 'Freedom', 'Continent_Africa', 'Continent_Asia', 'Continent_Europe', 'Continent_North_America', 'Continent_Oceania', 'Continent_South_America', 'Economy_Health', 'Trust_Freedom', 'Economy_Trust','Trust_Health', 'Happiness_Score','Predicted_Happiness_Score']
df = df[column_order]

In [8]:
length_dataset(df)
df.head(5)

Number of Row : 235
Number of Columns : 20
-------------------------Structure of the DataFrame------------------------


Unnamed: 0,id,Social_Support,Year,Trust,Generosity,Health,Economy,Freedom,Continent_Africa,Continent_Asia,Continent_Europe,Continent_North_America,Continent_Oceania,Continent_South_America,Economy_Health,Trust_Freedom,Economy_Trust,Trust_Health,Happiness_Score,Predicted_Happiness_Score
0,673.0,1.232,2019.0,0.005,0.083,0.825,1.162,0.462,0.0,0.0,1.0,0.0,0.0,0.0,0.95865,0.00231,0.00581,0.004125,6.07,5.738775
1,736.0,1.134,2019.0,0.072,0.153,0.571,0.45,0.292,1.0,0.0,0.0,0.0,0.0,0.0,0.25695,0.021024,0.0324,0.041112,4.681,4.473358
2,151.0,0.77115,2015.0,0.17922,0.20165,0.15185,0.46534,0.46866,1.0,0.0,0.0,0.0,0.0,0.0,0.070662,0.083993,0.083398,0.027215,3.655,4.35764
3,318.0,1.610574,2017.0,0.153527,0.47554,0.833552,1.480633,0.627163,0.0,0.0,1.0,0.0,0.0,0.0,1.234185,0.096286,0.227316,0.127972,7.504,7.161976
4,229.0,0.72803,2016.0,0.00679,0.12889,0.67602,1.1697,0.36712,0.0,0.0,1.0,0.0,0.0,0.0,0.790741,0.002493,0.007942,0.00459,5.528,5.401903


In [10]:
connection = get_engine()

create_table(connection, Model, 'ml_model')
insert_data (df, 'ml_model', connection)

INFO:root:Conected successfully to database!
INFO:root:Table dropped successfully.
INFO:root:Table created successfully.
INFO:root:Data uploaded


In [11]:
session = create_session(connection)

df = query_table(Model, connection, session)