### 웹에 들어갈 데이터프레임 만들기

In [87]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [88]:
# Load the data
data = pd.read_csv('../data/test.csv')
data

Unnamed: 0,c_temp_pv,k_rpm_pv,n_temp_pv,scale_pv,s_temp_pv
0,69.4,180,69.5,0.0,70.3
1,69.4,179,69.4,0.0,70.4
2,69.5,176,69.4,0.0,70.6
3,69.4,173,69.6,0.0,70.7
4,69.5,173,69.4,0.0,70.8
...,...,...,...,...,...
29404,69.7,191,67.6,0.0,67.3
29405,69.8,191,67.5,0.0,67.0
29406,69.7,191,67.4,0.0,66.8
29407,69.7,191,67.3,0.0,66.7


In [89]:
# scale_pv가 2 초과 4 미만인 데이터만 추출
data = data[(data['scale_pv'] > 2) & (data['scale_pv'] < 4)]
data.describe()

Unnamed: 0,c_temp_pv,k_rpm_pv,n_temp_pv,scale_pv,s_temp_pv
count,1395.0,1395.0,1395.0,1395.0,1395.0
mean,69.764158,187.535484,67.524875,3.046989,67.400215
std,0.351782,4.853152,0.91441,0.031522,1.108196
min,68.6,163.0,66.1,2.85,65.4
25%,69.6,187.0,66.8,3.03,66.5
50%,69.7,188.0,67.3,3.05,67.3
75%,70.0,189.0,67.9,3.06,68.2
max,70.7,202.0,70.6,3.28,71.1


In [90]:
# [ 구상 ]
# 1. 3가지 temp와 초기 RPM으로 모델1을 통해 pred_scale_pv를 예측
# 2. +0.05 scale_pv 당 -1 rpm의 비율로 rpm을 조정값을 계산
# 3. 다음 인스턴스에 temp와 조정된 rpm 반영

# 1. 모델1
import joblib

lr_model = joblib.load('../model/lr_model.pkl')
scaler = joblib.load('../model/scaler.pkl')
print(lr_model, scaler)

LinearRegression() StandardScaler()


In [91]:
# 2. 모델1을 통해 pred_scale_pv를 예측
X = data.drop(['scale_pv'], axis=1)
X_scaled = scaler.transform(X)
y_pred = lr_model.predict(X_scaled)
print(y_pred)

# mae, mape
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

mae = mean_absolute_error(data['scale_pv'], y_pred)
mape = mean_absolute_percentage_error(data['scale_pv'], y_pred)
print('mae:', mae, 'mape:', mape*100)
loss = data['scale_pv'] - y_pred
print('loss:', loss.describe())

[3.09858901 3.09930117 3.1010075  ... 3.07160849 3.07397311 3.07391456]
mae: 0.03646269968913092 mape: 1.2019539765558938
loss: count    1395.000000
mean       -0.031906
std         0.031021
min        -0.236146
25%        -0.050088
50%        -0.031794
75%        -0.015268
max         0.186770
Name: scale_pv, dtype: float64




In [92]:
# 3. +0.05 scale_pv 당 -1 rpm의 비율로 rpm을 조정값을 계산
def adjust_rpm(scale_pv, pred_scale_pv, rpm):
    scale_dif = scale_pv - pred_scale_pv
    adjust = scale_dif // 0.05
    rpm_dif = rpm - adjust
    return scale_dif, adjust, rpm_dif

data['pred_scale_pv'] = y_pred.copy()
data['scale_dif'], data['adjust'], data['rpm_dif'] = zip(*data.apply(lambda x: adjust_rpm(x['scale_pv'], x['pred_scale_pv'], x['k_rpm_pv']), axis=1))
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pred_scale_pv'] = y_pred.copy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['scale_dif'], data['adjust'], data['rpm_dif'] = zip(*data.apply(lambda x: adjust_rpm(x['scale_pv'], x['pred_scale_pv'], x['k_rpm_pv']), axis=1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['scale_dif']

Unnamed: 0,c_temp_pv,k_rpm_pv,n_temp_pv,scale_pv,s_temp_pv,pred_scale_pv,scale_dif,adjust,rpm_dif
33,69.6,168,70.1,3.06,68.0,3.098589,-0.038589,-1.0,169.0
34,69.5,169,70.1,3.06,68.0,3.099301,-0.039301,-1.0,170.0
47,69.6,169,70.6,3.16,69.4,3.101007,0.058993,1.0,168.0
48,69.6,173,70.5,3.16,69.5,3.099951,0.060049,1.0,172.0
87,69.6,180,69.3,3.17,68.0,3.092272,0.077728,1.0,179.0
...,...,...,...,...,...,...,...,...,...
29354,69.8,191,66.5,3.05,66.2,3.072214,-0.022214,-1.0,192.0
29355,69.8,191,66.4,3.05,66.0,3.071667,-0.021667,-1.0,192.0
29356,69.8,191,66.4,3.05,66.1,3.071608,-0.021608,-1.0,192.0
29371,69.8,191,66.8,3.09,66.6,3.073973,0.016027,0.0,191.0


In [93]:
# 4. 다음 인스턴스에 temp와 조정된 rpm 반영
# - 열 : c_temp_pv, n_temp_pv, s_temp_pv, k_rpm_pv, scale_pv, rpm_dif, scale_dif, loss
# - loss : scale_pv가 3 이상일 때 scale_pv - 3
data['loss'] = data['pred_scale_pv'].apply(lambda x: x-3 if x>3 else 0)
data['c_temp_sv'], data['n_temp_sv'], data['s_temp_sv'], data['k_rpm_sv'] = 70, 70, 70, 180
data = data[['c_temp_pv', 'n_temp_pv', 's_temp_pv', 'c_temp_sv', 'n_temp_sv', 's_temp_sv', 'k_rpm_pv', 'k_rpm_sv', 'scale_pv', 'rpm_dif', 'scale_dif', 'loss']]
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['loss'] = data['pred_scale_pv'].apply(lambda x: x-3 if x>3 else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['c_temp_sv'], data['n_temp_sv'], data['s_temp_sv'], data['k_rpm_sv'] = 70, 70, 70, 180
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['c_temp_sv'], data['n_temp_sv'], dat

Unnamed: 0,c_temp_pv,n_temp_pv,s_temp_pv,c_temp_sv,n_temp_sv,s_temp_sv,k_rpm_pv,k_rpm_sv,scale_pv,rpm_dif,scale_dif,loss
33,69.6,70.1,68.0,70,70,70,168,180,3.06,169.0,-0.038589,0.098589
34,69.5,70.1,68.0,70,70,70,169,180,3.06,170.0,-0.039301,0.099301
47,69.6,70.6,69.4,70,70,70,169,180,3.16,168.0,0.058993,0.101007
48,69.6,70.5,69.5,70,70,70,173,180,3.16,172.0,0.060049,0.099951
87,69.6,69.3,68.0,70,70,70,180,180,3.17,179.0,0.077728,0.092272
...,...,...,...,...,...,...,...,...,...,...,...,...
29354,69.8,66.5,66.2,70,70,70,191,180,3.05,192.0,-0.022214,0.072214
29355,69.8,66.4,66.0,70,70,70,191,180,3.05,192.0,-0.021667,0.071667
29356,69.8,66.4,66.1,70,70,70,191,180,3.05,192.0,-0.021608,0.071608
29371,69.8,66.8,66.6,70,70,70,191,180,3.09,191.0,0.016027,0.073973


In [94]:
# save data
data.to_csv('../data/sample_data.csv', index=False)

### 프로세스 설계
1. 첫 열 : c n s k
2. model : pred_scale_pv 예측
3. scale_dev = 3 - pred_scale_pv
4. rpm_dif = scale_dev가 0.025 이상일때 1, -0.025 이하일때 -1

In [114]:
# data에서 첫 번째 행을 추출, 3가지 temp와 rpm만
first_row = data.iloc[0, :4]
first_row

# Temp 값만 추출
temp_df = data[['c_temp_pv', 'n_temp_pv', 's_temp_pv']]
temp_df.reset_index(drop=True, inplace=True)
temp_df

# 첫 행만 k_rpm_pv = 168인 k_rpm_pv 열 생성
rpm_df = pd.DataFrame([168], columns=['k_rpm_pv'])
temp_df = pd.concat([temp_df, rpm_df], axis=1)
temp_df

Unnamed: 0,c_temp_pv,n_temp_pv,s_temp_pv,k_rpm_pv
0,69.6,70.1,68.0,168.0
1,69.5,70.1,68.0,
2,69.6,70.6,69.4,
3,69.6,70.5,69.5,
4,69.6,69.3,68.0,
...,...,...,...,...
1390,69.8,66.5,66.2,
1391,69.8,66.4,66.0,
1392,69.8,66.4,66.1,
1393,69.8,66.8,66.6,


## pred_scale_pv

In [120]:
# scale_pv 예측 함수
def scale_predict(row):
    """row를 받아서 scale_pv를 예측하는 함수

    Args:
        row (DataFrame): c_temp_pv, k_rpm_pv, n_temp_pv, s_temp_pv로 이루어진 DataFrame

    Returns:
        y_pred (float): 예측된 scale_pv
    """
    
    # columns 수정 : c_temp_pv  k_rpm_pv  n_temp_pv  s_temp_pv
    row = row[['c_temp_pv', 'k_rpm_pv', 'n_temp_pv', 's_temp_pv']]
    X_scaled = scaler.transform(row)
    pred_scale_pv = lr_model.predict(X_scaled)
    return pred_scale_pv

# 첫 행의 scale_pv 예측
row_num = 0
pred_scale_pv = scale_predict(temp_df.iloc[row_num].to_frame().T)
pred_scale_pv




array([3.09858901])

## scale_dev, rpm_dif, rpm_dev

In [None]:

# adjust rpm : 0.05 scale_pv 당 -1 rpm  (초당 최대 +- 2, 행당 +- 1)
# - scale_dev : 3과의 차이
scale_dev = 3 - y_pred  # 3.09 -> 0.09, 반올림하여 0.1, 0.1 / 0.05 = 2, -2

# - rpm_dif : RPM 변동값
# - if scale_dev가 0.025 이상일때 1, -0.025 이하일때 -1
ini_rpm = first_row['k_rpm_pv'].values[0]
if scale_dev > 0.025:
    rpm_dif = 1
elif scale_dev < -0.025:
    rpm_dif = - 1
else:
    rpm_dif = 0
next_rpm = ini_rpm + rpm_dif
print(scale_dev, rpm_dif, next_rpm)

---
## 행 인덱스 기준으로 정리