In [80]:
# import packages
import os
import warnings 
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

plt.rcParams['font.sans-serif'] = 'Arial Unicode MS'
plt.rcParams['axes.unicode_minus'] = False 

In [81]:
directory = os.getcwd()
ot_filename = "6Para_0827-0828.csv"  # "energy.csv"
ot_file_path = os.path.join(directory, 'input', ot_filename)
recommendation_filename = "yng_recommend_weight_data_log.csv"
recommendation_file_path = os.path.join(directory, 'input', recommendation_filename)

ot_data = pd.read_csv(ot_file_path)
recommendation_data = pd.read_csv(recommendation_file_path)

In [82]:
# Define the function to format the datetime columns in a DataFrame
def format_datetime_columns(df, column_names):
    for column in column_names:
        # Convert to datetime, handle mixed formats, and remove timezone information
        df[column] = pd.to_datetime(df[column], utc=True).dt.tz_localize(None)
        df[column] = df[column].dt.floor('s')
    return df

# format TS
ot_data = format_datetime_columns(ot_data, ['TS'])

# 去掉不要的参数列
key_tag_list = [
    'CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap1stSizing.rActualPosition_inches',
    'CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap2ndSizing.rActualPosition_inches',
    'CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap3rdSizing.rActualPosition_inches',
    'CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_GapFinalSizing.rActualPosition_inches',
    'CG_Sheeting.CG_Sheeting.dbHMI.Scoring.SRV_CrossScore.rSetpoint_Ratio',
    "SFBMix.PLC_BOSCH EXTRUDER.DB_Data_Exchange.EXT_PH_Temp_SP"
]

# Filter the DataFrame to include only the rows where 'Tag' is in key_tag_list
df_ot = ot_data[ot_data['Tag'].isin(key_tag_list)].reset_index(drop=True)

In [83]:
df_ot = df_ot.sort_values(by=['Tag','TS']) # sort by Tag, then TS
df_ot['Value'] = df_ot['Value'].round(4)

# We compare only the 'Value' column of each row with the previous one
# and also check for the first occurrence of each 'Tag'
diff_percent = 0.01
mask = ((df_ot['Value']-df_ot['Value'].shift()).abs() > diff_percent * df_ot['Value'].shift().abs()) | (df_ot['Tag'].ne(df_ot['Tag'].shift()))

# Apply the mask to filter the DataFrame
df_ot = df_ot[mask].reset_index(drop=True)
df_ot['Last_Value'] = np.where(df_ot['Tag'] == df_ot['Tag'].shift(), df_ot['Value'].shift(), np.nan)

df_ot

Unnamed: 0,Tag,TS,Value,Last_Value
0,CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap...,2024-08-27 21:00:00,0.1105,
1,CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap...,2024-08-27 21:25:45,0.1201,0.1105
2,CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap...,2024-08-27 21:25:46,0.1343,0.1201
3,CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap...,2024-08-27 21:25:47,0.1587,0.1343
4,CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap...,2024-08-27 21:25:48,0.1801,0.1587
...,...,...,...,...
340,SFBMix.PLC_BOSCH EXTRUDER.DB_Data_Exchange.EXT...,2024-08-28 01:34:59,60.0000,65.0000
341,SFBMix.PLC_BOSCH EXTRUDER.DB_Data_Exchange.EXT...,2024-08-28 03:26:02,65.0000,60.0000
342,SFBMix.PLC_BOSCH EXTRUDER.DB_Data_Exchange.EXT...,2024-08-28 03:28:35,60.0000,65.0000
343,SFBMix.PLC_BOSCH EXTRUDER.DB_Data_Exchange.EXT...,2024-08-28 04:10:06,65.0000,60.0000


In [84]:
df_ot.to_excel('output/sorted_ot_data.xlsx')

In [85]:
# format TS
recommendation_data = format_datetime_columns(recommendation_data, ['data_time', 'weight_ts'])
# recommendation_data['data_time'][0]

parameter_dict = {
    "recommend_1_roller_gap": "CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap1stSizing.rActualPosition_inches",
    "recommend_2_roller_gap": "CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap2ndSizing.rActualPosition_inches",
    "recommend_3_roller_gap": "CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_Gap3rdSizing.rActualPosition_inches",
    "recommend_forming_roller_gap": "CG_Sheeting.CG_Sheeting.dbHMI.Sheeting.SRV_GapFinalSizing.rActualPosition_inches",
    "recommend_extruder_temperature": "SFBMix.PLC_BOSCH EXTRUDER.DB_Data_Exchange.EXT_PH_Temp_SP",
    "recommend_cross_cutter_speed": "CG_Sheeting.CG_Sheeting.dbHMI.Scoring.SRV_CrossScore.rSetpoint_Ratio"
}

# Define the function to check for OT changes within a 2-minute window
def check_ot_changes(df, ot_data, para_names, ts_column, window_size=180):
    # Create a dictionary to store the change for each tag
    ot_changes = {}
    
    # Iterate over each parameter name
    for para_name in para_names:
        para_tag = parameter_dict.get(para_name) # based on dictionary
        print(f'Checking for {para_name}')
        
        # Initialize a list to store the changes for this parameter
        changes = []
        
        # Iterate over each row in the recommendation_data DataFrame
        for index, row in df.iterrows():
            # Define the time window
            start_time = row[ts_column] - pd.Timedelta(seconds=window_size)
            end_time = row[ts_column] + pd.Timedelta(seconds=window_size)

            # Filter the ot_data for the time window and the current tag
            filtered_ot = ot_data[(ot_data['TS'] >= start_time) & (ot_data['TS'] <= end_time) & (ot_data['Tag'] == para_tag)]

            # Check if there's more than one unique value for the tag within the time window
            if len(filtered_ot['Value'].unique()) > 1:
                unique_values = filtered_ot['Value'].unique()
                change_string = '->'.join(map(str, unique_values))
                changes.append(change_string)  # Record all changes
            else:
                changes.append('')  # No change
        
        # Create a new column in the recommendation_data DataFrame to store the changes
        # Use the tag name followed by '_change' as the column name
        df[f'{para_name}_change'] = changes
    
    return df

# Apply the function to the recommendation_data DataFrame
para_names = [
    'recommend_1_roller_gap', 'recommend_2_roller_gap','recommend_3_roller_gap','recommend_forming_roller_gap'
    # ,'recommend_extruder_temperature','recommend_cross_cutter_speed'
              ]  # list of parameter names

recommendation_data = check_ot_changes(
    recommendation_data,
    df_ot,  # The DataFrame containing OT data
    para_names, ts_column='weight_ts'
)

Checking for recommend_1_roller_gap
Checking for recommend_2_roller_gap
Checking for recommend_3_roller_gap
Checking for recommend_forming_roller_gap


In [86]:
recommendation_data[recommendation_data['recommend_2_roller_gap_change'] != '']

Unnamed: 0,id,is_change,sku,formula,extruder_temperature,slice_product_line_speed,target_weight,data_time,weight_ts,shift,...,predicted_weight_after_change,recommend_weight_data_id,create_by,update_by,create_time,update_time,recommend_1_roller_gap_change,recommend_2_roller_gap_change,recommend_3_roller_gap_change,recommend_forming_roller_gap_change
444,445,1,EXSM,EXSM-Add LC-63 Non-G,65.09,168.55,2.71,2024-08-27 22:28:00,2024-08-27 22:29:19,中班,...,35.23,399.0,SYSTEM,SYSTEM,2024-08-27 22:28:01,2024-08-27 22:28:01,,0.0887->0.0966,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
445,446,1,EXSM,EXSM-Add LC-63 Non-G,64.98,168.35,2.71,2024-08-27 22:28:00,2024-08-27 22:29:19,中班,...,35.23,400.0,SYSTEM,SYSTEM,2024-08-27 22:28:01,2024-08-27 22:28:01,,0.0887->0.0966,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
446,447,1,EXSM,EXSM-Add LC-63 Non-G,64.75,168.34,2.71,2024-08-27 22:28:15,2024-08-27 22:29:35,中班,...,35.23,401.0,SYSTEM,SYSTEM,2024-08-27 22:28:16,2024-08-27 22:28:16,,0.0887->0.0966->0.1132->0.128->0.1343->0.1227-...,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
447,448,1,EXSM,EXSM-Add LC-63 Non-G,64.75,168.27,2.71,2024-08-27 22:28:15,2024-08-27 22:29:35,中班,...,35.23,402.0,SYSTEM,SYSTEM,2024-08-27 22:28:16,2024-08-27 22:28:16,,0.0887->0.0966->0.1132->0.128->0.1343->0.1227-...,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
448,449,1,EXSM,EXSM-Add LC-63 Non-G,64.41,168.76,2.71,2024-08-27 22:28:30,2024-08-27 22:29:35,中班,...,35.23,403.0,SYSTEM,SYSTEM,2024-08-27 22:28:31,2024-08-27 22:28:31,,0.0887->0.0966->0.1132->0.128->0.1343->0.1227-...,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,572,1,EXSM,EXSM-Add LC-63 Non-G,65.09,168.45,2.71,2024-08-27 22:43:45,2024-08-27 22:29:35,中班,...,35.23,526.0,SYSTEM,SYSTEM,2024-08-27 22:43:46,2024-08-27 22:43:46,,0.0887->0.0966->0.1132->0.128->0.1343->0.1227-...,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
560,573,1,EXSM,EXSM-Add LC-63 Non-G,64.58,168.25,2.71,2024-08-27 22:44:00,2024-08-27 22:29:35,中班,...,35.23,527.0,SYSTEM,SYSTEM,2024-08-27 22:44:01,2024-08-27 22:44:01,,0.0887->0.0966->0.1132->0.128->0.1343->0.1227-...,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
561,574,1,EXSM,EXSM-Add LC-63 Non-G,64.58,168.25,2.71,2024-08-27 22:44:00,2024-08-27 22:29:35,中班,...,35.23,528.0,SYSTEM,SYSTEM,2024-08-27 22:44:01,2024-08-27 22:44:01,,0.0887->0.0966->0.1132->0.128->0.1343->0.1227-...,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863
562,575,1,EXSM,EXSM-Add LC-63 Non-G,64.36,168.55,2.71,2024-08-27 22:44:15,2024-08-27 22:29:35,中班,...,35.23,529.0,SYSTEM,SYSTEM,2024-08-27 22:44:15,2024-08-27 22:44:15,,0.0887->0.0966->0.1132->0.128->0.1343->0.1227-...,0.082->0.0943->0.1088->0.1199->0.1027->0.0878-...,0.0696->0.0786->0.0795->0.0833->0.0846->0.0863


In [89]:
start_dt = recommendation_data['data_time'].min().strftime('%Y%m%d')
end_dt = recommendation_data['data_time'].max().strftime('%Y%m%d')
print(recommendation_data['data_time'].min(), recommendation_data['data_time'].max())

2024-08-27 21:30:31 2024-08-28 08:17:00


#### Adoption% Review

In [93]:
def create_adoption_columns(df, para_names):
    for para_name in para_names:
        para_col = para_name
        change_col = f'{para_name}_change'
        adoption_col = f'{para_name}_adoption'

        df[adoption_col] = np.where(
            (df[para_col] != '') & (df[change_col] == ''), 
            100,  # AI推荐，操作员没调整
            np.where(
                (df[para_col] == '') & (df[change_col] != ''), 
                200,  # 操作员调整，AI没推荐
                np.where(
                    (df[para_col] != '') & (df[change_col] != '') & (df[para_col] == df[change_col]), 
                    300,  # AI推荐，操作员调整，两者一致
                    400  # AI推荐，操作员调整，两者不一致
                )
            )
        )
    return df


df_mapped = recommendation_data.copy()
df_mapped = create_adoption_columns(df_mapped, para_names)

df_mapped

Unnamed: 0,id,is_change,sku,formula,extruder_temperature,slice_product_line_speed,target_weight,data_time,weight_ts,shift,...,create_time,update_time,recommend_1_roller_gap_change,recommend_2_roller_gap_change,recommend_3_roller_gap_change,recommend_forming_roller_gap_change,recommend_1_roller_gap_adoption,recommend_2_roller_gap_adoption,recommend_3_roller_gap_adoption,recommend_forming_roller_gap_adoption
0,1,0,EXSM,EXSM-Add LC-63 Non-G,64.70,139.58,2.71,2024-08-27 21:30:31,2024-08-27 21:05:55,中班,...,2024-08-27 21:30:32,2024-08-27 21:30:32,,,0.0722->0.0714,,100,100,400,100
1,2,0,EXSM,EXSM-Add LC-63 Non-G,64.70,139.58,2.71,2024-08-27 21:30:31,2024-08-27 21:05:55,中班,...,2024-08-27 21:30:32,2024-08-27 21:30:32,,,0.0722->0.0714,,100,100,400,100
2,3,0,EXSM,EXSM-Add LC-63 Non-G,64.41,168.51,2.71,2024-08-27 21:30:45,2024-08-27 21:05:55,中班,...,2024-08-27 21:30:46,2024-08-27 21:30:46,,,0.0722->0.0714,,100,100,400,100
3,4,0,EXSM,EXSM-Add LC-63 Non-G,64.41,168.51,2.71,2024-08-27 21:30:45,2024-08-27 21:05:55,中班,...,2024-08-27 21:30:46,2024-08-27 21:30:46,,,0.0722->0.0714,,100,100,400,100
4,5,0,EXSM,EXSM-Add LC-63 Non-G,65.09,168.60,2.71,2024-08-27 21:31:00,2024-08-27 21:05:55,中班,...,2024-08-27 21:31:01,2024-08-27 21:31:01,,,0.0722->0.0714,,100,100,400,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4639,5152,1,RPWP,RPWP-1572171-550kg,64.81,0.00,2.71,2024-08-28 08:16:30,2024-08-28 07:57:25,晚班,...,2024-08-28 08:16:31,2024-08-28 08:16:31,,,0.0715->0.07,,100,100,400,100
4640,5153,1,RPWP,RPWP-1572171-550kg,64.47,0.00,2.71,2024-08-28 08:16:45,2024-08-28 07:57:25,晚班,...,2024-08-28 08:16:46,2024-08-28 08:16:46,,,0.0715->0.07,,100,100,400,100
4641,5154,1,RPWP,RPWP-1572171-550kg,64.47,0.00,2.71,2024-08-28 08:16:45,2024-08-28 07:57:25,晚班,...,2024-08-28 08:16:46,2024-08-28 08:16:46,,,0.0715->0.07,,100,100,400,100
4642,5155,1,RPWP,RPWP-1572171-550kg,65.21,0.00,2.71,2024-08-28 08:17:00,2024-08-28 07:57:25,晚班,...,2024-08-28 08:17:00,2024-08-28 08:17:00,,,0.0715->0.07,,100,100,400,100


In [92]:
df_mapped.to_excel(f'output/ot_model_mapping_{start_dt}_{end_dt}.xlsx')

#### Others

In [4]:
# 缺失值处理
df_nona = df_raw.dropna(subset=['1号冷辊出口压力', '1号冷辊入口压力'])
print(df_nona.isna().sum())
df_plc = df_nona.fillna('ffill')

timestamp                          0
1号冷辊入口压力                           0
1号冷辊入口温度                           1
1号冷辊出口压力                           0
1号冷辊出口温度                           3
1号冷辊皮带速度                          11
1号辊轮速度                            59
1号辊间隙                             42
2号冷辊入口压力                           3
2号冷辊入口温度                           5
2号冷辊出口压力                           5
2号冷辊出口温度                           7
2号冷辊皮带速度                          23
2号辊轮速度                            57
2号辊间隙                             38
3号辊轮速度                            54
3号辊间隙                             34
Circular Scoring Rollers 圆刀速度     18
Cooling Roller 辊轮速度               49
Cross Scoring Rollers 横刀速度         1
Forming Roller 定型辊间隙              29
Forming Roller 辊轮速度               45
入口胶温度                              6
出口胶温度L                             7
出口胶温度R                            10
大辊速度                              50
大辊间隙                              30
挤