<h3>准备数据</h3>

<h4>读取数据</h4>

In [300]:
from time import localtime, strftime
import requests
import json
import pandas as pd
from pandas import DataFrame
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU, Flatten, BatchNormalization
from tensorflow.keras.losses import Huber
from tensorflow.keras.regularizers import L1L2, L1, L2
from tensorflow.keras.callbacks import History, TensorBoard
from tensorflow.keras.backend import clear_session
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

<h4>设置tensorflow</h4>

In [301]:
for gpu in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(gpu, True)

<h3>处理数据</h3>

<h4>读取数据</h4>

In [302]:
data = pd.read_csv("./data/adjustment.tsv", delimiter="\t", skipinitialspace=True)
data = pd.read_csv("./data/adjustment.csv")
data

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w18,c1,c2,c3,c4,c5,c6,c7,c8,score
0,0.457,0.045,0.452,0.175,0.820,0.352,0.841,0.984,0.655,0.508,...,0.905,1.840,1.607,0.481,0.319,0.954,1.635,0.345,1.525,85.752
1,0.675,0.418,0.315,0.671,0.721,0.867,0.012,0.211,0.233,0.496,...,0.883,-0.034,0.272,-0.495,0.389,-0.289,0.359,0.343,0.238,86.408
2,0.628,0.272,0.788,0.971,0.383,0.057,0.916,0.541,0.795,0.396,...,0.510,0.113,0.329,-0.552,-0.253,0.054,0.355,-0.069,0.047,79.913
3,0.628,0.272,0.788,0.971,0.383,0.057,0.916,0.541,0.795,0.396,...,0.510,0.594,1.761,0.838,0.050,0.439,1.398,1.706,1.513,90.280
4,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,1.322,1.874,0.560,0.494,0.875,0.496,1.639,0.498,88.829
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49534,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,0.080,-0.331,-0.047,0.134,-0.078,0.167,0.144,0.379,88.734
49535,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.544,-0.086,0.245,-0.195,-0.091,-0.595,-0.191,0.030,81.514
49536,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.429,0.229,-0.261,-0.048,-0.021,-0.078,-0.597,0.388,78.514
49537,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,1.627,1.874,0.183,0.878,0.208,0.798,1.026,1.872,90.237


<h4>处理缺失值</h4>

In [303]:
data.dropna(axis=0, inplace=True)
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w18,c1,c2,c3,c4,c5,c6,c7,c8,score
0,0.457,0.045,0.452,0.175,0.820,0.352,0.841,0.984,0.655,0.508,...,0.905,1.840,1.607,0.481,0.319,0.954,1.635,0.345,1.525,85.752
1,0.675,0.418,0.315,0.671,0.721,0.867,0.012,0.211,0.233,0.496,...,0.883,-0.034,0.272,-0.495,0.389,-0.289,0.359,0.343,0.238,86.408
2,0.628,0.272,0.788,0.971,0.383,0.057,0.916,0.541,0.795,0.396,...,0.510,0.113,0.329,-0.552,-0.253,0.054,0.355,-0.069,0.047,79.913
3,0.628,0.272,0.788,0.971,0.383,0.057,0.916,0.541,0.795,0.396,...,0.510,0.594,1.761,0.838,0.050,0.439,1.398,1.706,1.513,90.280
4,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,1.322,1.874,0.560,0.494,0.875,0.496,1.639,0.498,88.829
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48544,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,0.080,-0.331,-0.047,0.134,-0.078,0.167,0.144,0.379,88.734
48545,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.544,-0.086,0.245,-0.195,-0.091,-0.595,-0.191,0.030,81.514
48546,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.429,0.229,-0.261,-0.048,-0.021,-0.078,-0.597,0.388,78.514
48547,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,1.627,1.874,0.183,0.878,0.208,0.798,1.026,1.872,90.237


In [304]:
# 剔除c1-c8列存在值大于1的行
data = data[data.loc[:, "c1":"c8"].apply(lambda x: x < 1).all(axis=1)]
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w18,c1,c2,c3,c4,c5,c6,c7,c8,score
0,0.675,0.418,0.315,0.671,0.721,0.867,0.012,0.211,0.233,0.496,...,0.883,-0.034,0.272,-0.495,0.389,-0.289,0.359,0.343,0.238,86.408
1,0.628,0.272,0.788,0.971,0.383,0.057,0.916,0.541,0.795,0.396,...,0.510,0.113,0.329,-0.552,-0.253,0.054,0.355,-0.069,0.047,79.913
2,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,0.015,-0.263,-0.483,-0.026,-0.004,0.096,-0.154,0.054,82.644
3,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,-0.182,-0.172,-0.525,0.197,0.044,-0.138,0.331,0.020,81.480
4,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,-0.506,-0.321,0.206,-0.365,0.188,-0.048,-0.041,0.087,86.050
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19475,0.020,0.147,0.127,0.075,0.534,0.400,0.035,0.487,0.823,0.476,...,0.002,-0.054,-0.058,-0.146,0.364,-0.211,-0.324,0.048,0.143,85.833
19476,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,0.080,-0.331,-0.047,0.134,-0.078,0.167,0.144,0.379,88.734
19477,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.544,-0.086,0.245,-0.195,-0.091,-0.595,-0.191,0.030,81.514
19478,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.429,0.229,-0.261,-0.048,-0.021,-0.078,-0.597,0.388,78.514


In [305]:
# 分数列除以100
data['score'] = data['score'] / 100
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['score'] = data['score'] / 100


Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w18,c1,c2,c3,c4,c5,c6,c7,c8,score
0,0.675,0.418,0.315,0.671,0.721,0.867,0.012,0.211,0.233,0.496,...,0.883,-0.034,0.272,-0.495,0.389,-0.289,0.359,0.343,0.238,0.86408
1,0.628,0.272,0.788,0.971,0.383,0.057,0.916,0.541,0.795,0.396,...,0.510,0.113,0.329,-0.552,-0.253,0.054,0.355,-0.069,0.047,0.79913
2,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,0.015,-0.263,-0.483,-0.026,-0.004,0.096,-0.154,0.054,0.82644
3,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,-0.182,-0.172,-0.525,0.197,0.044,-0.138,0.331,0.020,0.81480
4,0.590,0.521,0.430,0.613,0.164,0.042,0.899,0.657,0.249,0.182,...,0.524,-0.506,-0.321,0.206,-0.365,0.188,-0.048,-0.041,0.087,0.86050
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19475,0.020,0.147,0.127,0.075,0.534,0.400,0.035,0.487,0.823,0.476,...,0.002,-0.054,-0.058,-0.146,0.364,-0.211,-0.324,0.048,0.143,0.85833
19476,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,0.080,-0.331,-0.047,0.134,-0.078,0.167,0.144,0.379,0.88734
19477,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.544,-0.086,0.245,-0.195,-0.091,-0.595,-0.191,0.030,0.81514
19478,0.447,0.870,0.689,0.297,0.178,0.411,0.754,0.542,0.721,0.915,...,0.966,-0.429,0.229,-0.261,-0.048,-0.021,-0.078,-0.597,0.388,0.78514


In [306]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.callbacks import LearningRateScheduler
from time import strftime, localtime
import numpy as np

train_features = data.iloc[:, :18]
train_labels = data.iloc[:, 18:]  # c1 to c2
train_scores = data.iloc[:, -1]

# # Standardize features
# scaler = MinMaxScaler()
# features_scaled = scaler.fit_transform(train_features)

# # 使用在训练集上fit_transform好的scaler对象，对测试集进行transform
# # 这里要使用transform，不能使用fit_transform，否则测试集的分布就会发生改变
# real_test_features_scaled = scaler.transform(real_test_features)
# real_test_features_scaled, real_test_features

In [298]:
from sklearn.ensemble import RandomForestRegressor

regr = RandomForestRegressor(max_depth=3, random_state=42)
regr.fit(train_features, train_labels, sample_weight=train_scores)

In [299]:
y_pred = regr.predict(train_features)
print(mean_squared_error(train_labels, y_pred))

827.9930690144995


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    train_features, train_labels, test_size=0.2, random_state=42
)

model = Sequential([
    BatchNormalization(input_shape=(X_train.shape[1], ), name="bn_0"),
    Dense(units=128, activation=LeakyReLU(), name="dense_1"),
    Dense(units=128, activation=LeakyReLU(), name="dense_2"),
    Dense(units=128, activation=LeakyReLU(), name="dense_3"),
    Dropout(0.1, name="dropout_1"),
    Dense(units=y_train.shape[1], activation=LeakyReLU(), name="dense_8")
], name="adjustment")

# 定义阶梯式学习率函数
def step_decay(epoch):
    initial_lr = 1e-3
    drop = 0.5
    epochs_drop = 50
    lr = initial_lr * (drop ** (np.floor((1 + epoch) / epochs_drop)))
    return lr

# 使用LearningRateScheduler回调
lr_scheduler = LearningRateScheduler(step_decay, verbose=1)

# 定义损失函数（结合得分信息）
def custom_loss(y_true, y_pred):
    pred_rmse = tf.keras.losses.mean_squared_error(y_true, y_pred)
    y_true_score = y_true[:, -1]
    y_pred_score = y_pred[:, -1]
    score_loss = tf.keras.losses.mean_squared_error(y_true_score, y_pred_score)  # 假设y_pred中包含得分信息
    total_loss = 0.2 * pred_rmse + 0.8 * (1.1 - score_loss)
    # total_loss = pred_rmse
    return total_loss

# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
                loss=Huber(),
                metrics=["mse"])

print(model.summary())

history: History = model.fit(
    x=X_train,
    y=y_train,
    validation_data=(X_test, y_test),
    batch_size=32,
    epochs=200,
    verbose=1,
    workers=-1,
    use_multiprocessing=True,
    callbacks=[
        TensorBoard(log_dir="./logs/" + strftime("%Y%m%d-%H%M%S", localtime())),
        lr_scheduler
    ]
)
tf.keras.models.save_model(model, "/home/wxy/itsc2023/models/error_fix/2056")

In [None]:
!tensorboard --logdir ./logs --bind_all

In [None]:
# 评估模型
from tensorflow.keras.models import load_model
model_path = '/home/wxy/itsc2023/models/error_fix/2056'
loaded_model = load_model(model_path, custom_objects={'custom_loss': custom_loss})
y_pred = model.predict(X_test)
print(mean_squared_error(y_test, y_pred))

<h3>模型部署</h3>

<h4>部署服务</h4>

In [None]:
!sudo docker run -t \
    -p 9989:9989 \
    -v "/home/wxy/itsc2023/models/error_fix/2056:/models/error_fix/2056" \
    -e MODEL_NAME=adjustment \
    tensorflow/serving:2.14.0-gpu

<h3>测试数据</h3>

<h4>读取数据并测试</h4>

In [None]:
data = X_test.values.tolist()
headers = {"content-type": "application/json"}

In [None]:
import subprocess

result = subprocess.run("echo 123|sudo -S docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' 2615fe3d76fd" ,shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
print(result.stdout) # 输出为1

In [None]:
response = requests.post(
    url="http://172.17.0.3:8501/v1/models/adjustment:predict",
    data=json.dumps({"instances": data}),
    headers=headers
)
if response.status_code != 200:
    raise ValueError(response.text)

In [None]:
import numpy as np
y_pred = np.array(json.loads(response.text)["predictions"])

In [None]:
print(mean_squared_error(y_test, y_pred))