# Problem Statement:
Predicting temperature of the Permanent Magnet Synchronous pmsm(PMSM) given other
sensor measurements during operation..

In [5]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import MinMaxScaler

import xgboost as xg
from sklearn.metrics import mean_squared_error as mse

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

### Import Data

In [2]:
pmsm = pd.read_csv("/home/a/Desktop/lib/temperature_data.csv")

pmsm


Unnamed: 0,ambient,coolant,u_d,u_q,motor_speed,torque,i_d,i_q,pm,stator_yoke,stator_tooth,stator_winding,profile_id
0,-0.752143,-1.118446,0.327935,-1.297858,-1.222428,-0.250182,1.029572,-0.245860,-2.522071,-1.831422,-2.066143,-2.018033,4
1,-0.771263,-1.117021,0.329665,-1.297686,-1.222429,-0.249133,1.029509,-0.245832,-2.522418,-1.830969,-2.064859,-2.017631,4
2,-0.782892,-1.116681,0.332771,-1.301822,-1.222428,-0.249431,1.029448,-0.245818,-2.522673,-1.830400,-2.064073,-2.017343,4
3,-0.780935,-1.116764,0.333700,-1.301852,-1.222430,-0.248636,1.032845,-0.246955,-2.521639,-1.830333,-2.063137,-2.017632,4
4,-0.774043,-1.116775,0.335206,-1.303118,-1.222429,-0.248701,1.031807,-0.246610,-2.521900,-1.830498,-2.062795,-2.018145,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
998065,-0.047497,0.341638,0.331475,-1.246114,-1.222428,-0.255640,1.029142,-0.245723,0.429853,1.018568,0.836084,0.494725,72
998066,-0.048839,0.320022,0.331701,-1.250655,-1.222437,-0.255640,1.029148,-0.245736,0.429751,1.013416,0.834438,0.494279,72
998067,-0.042350,0.307415,0.330946,-1.246852,-1.222430,-0.255640,1.029191,-0.245701,0.429439,1.002906,0.833936,0.492666,72
998068,-0.039433,0.302082,0.330987,-1.249505,-1.222432,-0.255640,1.029147,-0.245727,0.429558,0.999157,0.830504,0.490581,72


##### taking sample of 100000 to test with model

In [3]:
pmsm_sampled = pmsm.sample(n=100000, random_state=12, )

In [4]:
x_sample = pmsm_sampled.drop(labels=["pm","profile_id"], axis=1)
y_sample = pmsm_sampled["pm"]

In [5]:


x_train_sample, x_test_sample, y_train_sample, y_test_sample = train_test_split(x_sample, y_sample, test_size=.20, random_state=12)

In [6]:
xgb_model_sample = xg.XGBRegressor(random_state = 12)

xgb_model_sample.fit(x_train_sample, y_train_sample)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=4,
             num_parallel_tree=1, predictor='auto', random_state=12,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [None]:
grid_search = GridSearchCV(estimator=xgb_model_sample, 
                           param_grid={"max_depth":[15,16],
#                                      "subsample":[0.5,1],
#                                      "learning_rate" : [0.1,0.2,0.3]                
                                                          }, 
                           n_jobs=-1, verbose=1, cv =2)




grid_search.fit(x_train_sample,y_train_sample)

Fitting 2 folds for each of 2 candidates, totalling 4 fits


In [8]:
grid_search.best_params_

{'max_depth': 15}

In [9]:
grid_search.best_score_

0.9802338661752561

In [27]:
xgb_model_sample2 = xg.XGBRegressor(max_depth = 15, learning_rate = 0.3, n_jobs = -1,
                                    subsample = 1, random_state = 12)

xgb_model_sample2.fit(x_train_sample, y_train_sample)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.3, max_delta_step=0,
             max_depth=15, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=-1,
             num_parallel_tree=1, predictor='auto', random_state=12,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [28]:
xgb_model_sample2.score(x_test_sample, y_test_sample)

0.9882607956095762

## Building full model

##### we got good result in max_depth as 15 lets build full model on this

In [6]:
x = pmsm.drop(labels=["pm","profile_id"], axis=1)
y = pmsm["pm"]

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.20, random_state=12)

In [15]:
xgb_model_full = xg.XGBRegressor(n_estimators = 15,max_depth = 15, learning_rate = 0.3, n_jobs = -1,
                                    subsample = 1, random_state = 12, verbose = True)

xgb_model_full.fit(x_train, y_train)

Parameters: { "verbose" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.3, max_delta_step=0,
             max_depth=15, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=15, n_jobs=-1,
             num_parallel_tree=1, predictor='auto', random_state=12,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbose=True,
             verbosity=None)

In [16]:
xgb_model_full.score(x_train, y_train)

0.9982839422380287

In [17]:
xgb_model_full.score(x_test, y_test)

0.997213028676991