# Problem Statement:
Predicting temperature of the Permanent Magnet Synchronous pmsm(PMSM) given other
sensor measurements during operation..

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor


from sklearn.model_selection import GridSearchCV

### Import Data

In [2]:
pmsm = pd.read_csv("/home/a/Desktop/lib/temperature_data.csv")

pmsm

Unnamed: 0,ambient,coolant,u_d,u_q,motor_speed,torque,i_d,i_q,pm,stator_yoke,stator_tooth,stator_winding,profile_id
0,-0.752143,-1.118446,0.327935,-1.297858,-1.222428,-0.250182,1.029572,-0.245860,-2.522071,-1.831422,-2.066143,-2.018033,4
1,-0.771263,-1.117021,0.329665,-1.297686,-1.222429,-0.249133,1.029509,-0.245832,-2.522418,-1.830969,-2.064859,-2.017631,4
2,-0.782892,-1.116681,0.332771,-1.301822,-1.222428,-0.249431,1.029448,-0.245818,-2.522673,-1.830400,-2.064073,-2.017343,4
3,-0.780935,-1.116764,0.333700,-1.301852,-1.222430,-0.248636,1.032845,-0.246955,-2.521639,-1.830333,-2.063137,-2.017632,4
4,-0.774043,-1.116775,0.335206,-1.303118,-1.222429,-0.248701,1.031807,-0.246610,-2.521900,-1.830498,-2.062795,-2.018145,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
998065,-0.047497,0.341638,0.331475,-1.246114,-1.222428,-0.255640,1.029142,-0.245723,0.429853,1.018568,0.836084,0.494725,72
998066,-0.048839,0.320022,0.331701,-1.250655,-1.222437,-0.255640,1.029148,-0.245736,0.429751,1.013416,0.834438,0.494279,72
998067,-0.042350,0.307415,0.330946,-1.246852,-1.222430,-0.255640,1.029191,-0.245701,0.429439,1.002906,0.833936,0.492666,72
998068,-0.039433,0.302082,0.330987,-1.249505,-1.222432,-0.255640,1.029147,-0.245727,0.429558,0.999157,0.830504,0.490581,72


##### Taking sample for sample training and testing

In [3]:
pmsm.columns

Index(['ambient', 'coolant', 'u_d', 'u_q', 'motor_speed', 'torque', 'i_d',
       'i_q', 'pm', 'stator_yoke', 'stator_tooth', 'stator_winding',
       'profile_id'],
      dtype='object')

In [4]:
pmsm_sampled = pmsm.sample(n=100000, random_state=12, )

x_sample = pmsm_sampled.drop(labels=["pm","profile_id","stator_yoke","stator_winding","torque"], axis=1)
y_sample = pmsm_sampled["pm"]

In [5]:
from sklearn.model_selection import train_test_split
x_train_sample, x_test_sample, y_train_sample, y_test_sample = train_test_split(x_sample, y_sample, test_size=.20, random_state=12)

##### Create sample model

In [6]:
rf_model_sample = RandomForestRegressor(random_state=12, n_jobs = -1)

In [7]:
rf_model_sample.fit(x_train_sample, y_train_sample)

RandomForestRegressor(n_jobs=-1, random_state=12)

In [8]:
print(rf_model_sample.score(x_train_sample, y_train_sample))
print(rf_model_sample.score(x_test_sample, y_test_sample))

0.9980808101236954
0.9857633031377877


In [10]:
grid_search_sample = GridSearchCV(estimator=rf_model_sample, param_grid={"max_depth" : [24,26]})

In [11]:
grid_search_sample.fit(x_train_sample, y_train_sample)

GridSearchCV(estimator=RandomForestRegressor(n_jobs=-1, random_state=12),
             param_grid={'max_depth': [24, 26]})

In [12]:
grid_search_sample.best_params_

{'max_depth': 26}

In [13]:
rf_model_sample.score(x_test_sample, y_test_sample)

0.9857633031377877

#### As we observed in grid_search , max depth crossing 26 so , in our model will not use max_deprh limitation

##### create dependent and independent features

In [4]:
x = pmsm.drop(labels=["pm","profile_id","stator_yoke","stator_winding","torque"], axis=1)
y = pmsm["pm"]

In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.20, random_state=12)

In [5]:
rf_model = RandomForestRegressor(n_estimators=20, random_state=12, n_jobs = -1, verbose=True)

In [6]:
rf_model.fit(x_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:  1.4min finished


RandomForestRegressor(n_estimators=20, n_jobs=-1, random_state=12, verbose=True)

In [7]:
y_test_pred = rf_model.predict(x_test)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  20 out of  20 | elapsed:    0.9s finished


In [8]:
np.mean((y_test_pred - y_test)**2)

0.0016802536402109793

In [9]:
rf_model.score(x_test, y_test)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  20 out of  20 | elapsed:    1.0s finished


0.9983035475152867

In [10]:
x_train.shape, x_test.shape

((798456, 8), (199614, 8))

In [3]:
import pickle

In [None]:
with open('model_rf.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

In [6]:
with open('model_rf.pkl','rb') as f:
    mp = pickle.load(f)

In [7]:
y_test_pred_pkl = mp.predict(x_test)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.6s finished


In [8]:
np.mean((y_test_pred_pkl - y_test)**2)

0.0018912663614853399