# Retrieve dataset from HDFS

In [68]:
import pyspark 
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType,StructField,FloatType

import numpy as np
import pandas as pd 

spark = SparkSession.builder.master("local").appName("hdfs_test").getOrCreate()
core_site = "hdfs://localhost:9000/"
data_fold = "electric_motor/measures.csv"


elec_schema = StructType([ \
    StructField("u_q",FloatType(),True), \
    StructField("coolant",FloatType(),True), \
    StructField("stator_winding",FloatType(),True), \
    StructField("u_d", FloatType(), True), \
    StructField("stator_tooth", FloatType(), True), \
    StructField("motor_speed", FloatType(), True), \
    StructField("i_d", FloatType(), True), \
    StructField("i_q", FloatType(), True), \
    StructField("pm", FloatType(), True), \
    StructField("stator_yoke", FloatType(), True), \
    StructField("ambient", FloatType(), True), \
    StructField("torque", FloatType(), True), \
    StructField("profile_id", FloatType(), True) 
  ])

elec_data=spark.read.csv(core_site+data_fold,schema=elec_schema)
elec_data.show(5)

+-----------+---------+--------------+-----------+------------+------------+------------+-------------+---------+-----------+---------+----------+----------+
|        u_q|  coolant|stator_winding|        u_d|stator_tooth| motor_speed|         i_d|          i_q|       pm|stator_yoke|  ambient|    torque|profile_id|
+-----------+---------+--------------+-----------+------------+------------+------------+-------------+---------+-----------+---------+----------+----------+
| -0.4506815|18.805172|      19.08667| -0.3500546|   18.293219|0.0028655678| 0.004419137| 3.2810218E-4|24.554214|  18.316547| 19.85069| 0.1871008|      17.0|
|  -0.325737|18.818571|      19.09239|  -0.305803|   18.294807|2.5678167E-4|6.0587237E-4|-7.8535266E-4|24.538078|  18.314955|19.850672|0.24541749|      17.0|
|-0.44086403| 18.82877|      19.08938|-0.37250262|   18.294094|0.0023549714|0.0012895871|  3.864682E-4|24.544693|  18.326307|19.850657|0.17661534|      17.0|
|-0.32702568|18.835567|      19.08303| -0.3161987|  

# Train test split data

In [69]:
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score 

target_cols = ('pm','torque')
x = elec_data.drop('pm','torque').toPandas()
y = elec_data.select('pm','torque').toPandas()

x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.8,random_state=42)

# Model building & training

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Input,Dense
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.regularizers import l1,l2

def create_model(name):
    layer_neurons = [64,32]

    ip = Input( shape=(11) )
    dense = Dense(128, activation='relu') (ip)
    for n in layer_neurons:
        dense = Dense(n, activation='relu',kernel_regularizer=l1()) (dense)

    op = Dense(1,activation='linear',name='pm') (dense)
    
    model = Model(inputs=ip,outputs=op,name=name)

    model.compile(loss='mean_squared_error', optimizer='adam')

    return model 

Model for rotor temperature

In [None]:
pm_model = create_model("pm")
pm_model.summary()
pm_model.fit(x_train,y_train['pm'],epochs=1,verbose=1,validation_split=0.2)

Model for rotor torque

In [None]:
torque_model = create_model("torque")
torque_model.summary() 
torque_model.fit(x_train,y_train['torque'],epochs=1,verbose=1,validation_split=0.2) 

# Getting inferences & printing final results

In [None]:
y_pm_pred_test = pm_model.predict(x_test) 
print('R2 score: ',r2_score( y_test['pm'].values, y_pm_pred_test ) ) 

In [None]:
y_torque_pred_test = torque_model.predict(x_test)
print('R2 score: ',r2_score( y_test['pm'].values, y_torque_pred_test ) )

# Saving inferences

In [None]:
TRAIN_CSV_PATH = "/home/hdoop/electric_motor/elec_train.csv"
TEST_CSV_PATH = "/home/hdoop/electric_motor/elec_test.csv"

pred_df = pd.DataFrame({"pm_pred":y_pm_pred_test,"torque_test":y_torque_pred_test})
test_df = pd.concat([x_test,y_test,pred_df],axis=1)
test_df.to_csv(TEST_CSV_PATH,index=False) 

# Saving weights & plotting model

In [None]:
pm_model.save("/home/hdoop/electric_motor/pm_model.h5")
tf.keras.utils.plot_model(pm_model,show_shapes=True) 

In [None]:
torque_model.save("/home/hdoop/electric_motor/torque_model.h5")
tf.keras.utils.plot_model(torque_model,show_shapes=True) 