In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import numpy as np

In [2]:
data = pd.read_csv("bf3_data_2022_01_07.csv")
data.drop(columns=['SKIN_TEMP_AVG'],inplace=True)
data['DATE_TIME'] = pd.to_datetime(data['DATE_TIME'], format="%d-%m-%y %H:%M")
data.dropna(inplace=True)
data

Unnamed: 0,DATE_TIME,CB_FLOW,CB_PRESS,CB_TEMP,STEAM_FLOW,STEAM_TEMP,STEAM_PRESS,O2_PRESS,O2_FLOW,O2_PER,...,TOP_TEMP1,TOP_TEMP2,TOP_TEMP3,TOP_TEMP4,TOP_SPRAY,TOP_TEMP,TOP_PRESS_1,CO,CO2,H2
0,2021-07-01 00:10:00,311727.0,3.15,129.0,4.0,213.0,3.34,3.20,7296.0,23.08,...,112.0,135.0,107.0,130.0,0.0,121.0,2.0,22.22,21.00,3.88
1,2021-07-01 00:20:00,315163.0,3.16,129.0,4.0,209.0,3.35,3.20,7829.0,23.08,...,120.0,143.0,109.0,128.0,0.0,125.0,1.0,22.56,21.00,3.94
2,2021-07-01 00:30:00,314595.0,3.16,128.0,4.0,205.0,3.35,3.21,7904.0,23.08,...,123.0,138.0,110.0,124.0,0.0,124.0,1.0,22.49,21.08,3.94
3,2021-07-01 00:40:00,312465.0,3.16,127.0,4.0,200.0,3.35,3.21,7919.0,23.08,...,119.0,128.0,102.0,110.0,0.0,115.0,1.0,22.36,21.13,3.99
4,2021-07-01 00:50:00,302981.0,3.11,126.0,4.0,194.0,3.29,3.16,7938.0,23.08,...,125.0,139.0,112.0,124.0,0.0,125.0,1.0,22.25,21.30,4.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25400,2021-12-31 23:10:00,278198.0,2.75,76.0,2.0,189.0,2.92,2.79,2628.0,22.25,...,100.0,116.0,108.0,122.0,0.2,111.0,1.0,21.90,20.25,3.10
25401,2021-12-31 23:20:00,286486.0,2.80,77.0,1.0,190.0,2.97,2.84,2590.0,22.22,...,103.0,113.0,105.0,119.0,0.2,110.0,1.0,22.09,20.14,3.04
25402,2021-12-31 23:30:00,284500.0,2.81,77.0,0.0,191.0,2.98,2.85,2592.0,22.20,...,101.0,116.0,108.0,115.0,0.1,110.0,1.0,22.04,20.20,2.99
25403,2021-12-31 23:40:00,284455.0,2.83,77.0,1.0,190.0,3.00,2.87,2582.0,22.21,...,108.0,119.0,113.0,121.0,0.2,115.0,1.0,22.03,20.27,3.09


In [3]:
def train_and_predict(X, y, shift_hours):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    predictions = model.predict(X)
    predicted_df = pd.DataFrame(predictions, columns=X.columns)
    predicted_df['DATE_TIME'] = X.index + pd.to_timedelta(shift_hours, unit='h')
    mse = mean_squared_error(y_test, model.predict(X_test))
    mae = mean_absolute_error(y_test, model.predict(X_test))
    r2 = r2_score(y_test, model.predict(X_test))
    print(f"Shift {shift_hours} hours - Mean Squared Error: {mse}")
    print(f"Shift {shift_hours} hours - Mean Absolute Error: {mae}")
    print(f"Shift {shift_hours} hours - R-squared: {r2}")
    return model, predicted_df

In [4]:
X = data.drop(columns=['DATE_TIME'])
X.index = data['DATE_TIME']
y = X.copy()

In [5]:
model_shift_1, predicted_df_shift_1 = train_and_predict(X, y, shift_hours=1)
predicted_df_shift_1['CO_CO2_ratio'] = predicted_df_shift_1['CO'] / predicted_df_shift_1['CO2']
#predicted_df_shift_1.to_csv('predicted_data_after_1_hour.csv', index=False)
predicted_df_shift_1

Shift 1 hours - Mean Squared Error: 4156.148726026898
Shift 1 hours - Mean Absolute Error: 10.36124606960261
Shift 1 hours - R-squared: 0.6875937956909922


Unnamed: 0,CB_FLOW,CB_PRESS,CB_TEMP,STEAM_FLOW,STEAM_TEMP,STEAM_PRESS,O2_PRESS,O2_FLOW,O2_PER,PCI,...,TOP_TEMP3,TOP_TEMP4,TOP_SPRAY,TOP_TEMP,TOP_PRESS_1,CO,CO2,H2,DATE_TIME,CO_CO2_ratio
0,311718.10,3.1735,128.29,3.71,210.20,3.3618,3.2222,7286.15,23.2852,32.98,...,111.26,129.44,0.074,122.70,1.67,22.2142,21.0681,3.9294,2021-07-01 01:10:00,1.054400
1,315191.48,3.1798,129.49,4.09,207.57,3.3728,3.2225,7855.51,23.3035,30.31,...,121.71,136.08,0.065,132.44,1.02,22.5119,21.1078,3.9942,2021-07-01 01:20:00,1.066520
2,314583.92,3.1964,128.31,4.10,206.35,3.3889,3.2464,7903.69,23.3292,31.93,...,115.07,129.11,0.068,126.64,1.06,22.5217,21.0578,3.9881,2021-07-01 01:30:00,1.069518
3,312480.53,3.2019,127.41,3.55,196.27,3.3914,3.2517,7984.67,23.6417,35.20,...,118.79,133.98,0.153,126.96,1.27,22.5157,21.1522,4.1242,2021-07-01 01:40:00,1.064461
4,302954.19,3.1323,125.83,3.59,193.85,3.3126,3.1822,7951.90,23.2785,37.51,...,122.44,136.69,0.054,132.30,1.08,22.3273,21.2370,4.0945,2021-07-01 01:50:00,1.051340
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21510,278212.25,2.7415,78.17,1.78,188.95,2.9119,2.7830,2620.84,22.1063,12.54,...,110.34,124.88,0.174,116.62,1.00,22.3007,20.3529,3.0335,2022-01-01 00:10:00,1.095701
21511,286489.02,2.7911,77.80,1.72,190.90,2.9662,2.8321,2599.26,22.1923,12.02,...,109.72,122.32,0.179,115.54,1.00,22.2258,20.2690,3.0226,2022-01-01 00:20:00,1.096542
21512,284487.54,2.7894,79.15,1.01,190.59,2.9635,2.8309,2594.79,22.2247,12.87,...,115.95,123.25,0.118,119.06,1.00,22.2779,20.2457,2.9376,2022-01-01 00:30:00,1.100377
21513,284465.29,2.7801,79.88,1.59,190.17,2.9560,2.8223,2570.50,22.2291,12.35,...,119.91,125.55,0.127,121.80,1.00,22.4099,20.2301,2.9260,2022-01-01 00:40:00,1.107750


In [6]:
X_shift_2 = predicted_df_shift_1.drop(columns=['DATE_TIME'])
X_shift_2.index = predicted_df_shift_1['DATE_TIME']
model_shift_2, predicted_df_shift_2 = train_and_predict(X_shift_2, X_shift_2, shift_hours=2)
predicted_df_shift_2['CO_CO2_ratio'] = predicted_df_shift_2['CO'] / predicted_df_shift_2['CO2']
#predicted_df_shift_2.to_csv('predicted_data_after_2_hours.csv', index=False)
predicted_df_shift_2

Shift 2 hours - Mean Squared Error: 1667.2030847185417
Shift 2 hours - Mean Absolute Error: 4.935261217729759
Shift 2 hours - R-squared: 0.9565707992014733


Unnamed: 0,CB_FLOW,CB_PRESS,CB_TEMP,STEAM_FLOW,STEAM_TEMP,STEAM_PRESS,O2_PRESS,O2_FLOW,O2_PER,PCI,...,TOP_TEMP3,TOP_TEMP4,TOP_SPRAY,TOP_TEMP,TOP_PRESS_1,CO,CO2,H2,CO_CO2_ratio,DATE_TIME
0,311725.4353,3.186109,127.5101,3.5946,208.4545,3.374345,3.234745,7215.1445,23.387668,32.4425,...,113.6444,127.7110,0.11903,122.2876,1.4863,22.298251,21.064341,3.934835,1.058578,2021-07-01 03:10:00
1,315218.3513,3.192752,129.4071,4.1107,206.1529,3.386525,3.236902,7833.7600,23.447021,30.4836,...,128.3113,139.4136,0.10907,135.6309,1.0304,22.507120,21.154168,4.016606,1.063957,2021-07-01 03:20:00
2,314566.7040,3.208600,128.8935,4.0151,207.1250,3.401359,3.258447,7856.7667,23.475252,31.6104,...,116.7188,131.6172,0.11178,127.2220,1.0706,22.527626,21.058810,4.014744,1.069748,2021-07-01 03:30:00
3,312493.8236,3.201249,127.0838,3.6388,194.7788,3.391061,3.251070,7967.8876,23.635164,33.9601,...,116.7337,133.4347,0.15490,125.8067,1.2719,22.512062,21.164483,4.119077,1.063672,2021-07-01 03:40:00
4,302930.8123,3.148343,125.9929,3.5260,193.8158,3.330596,3.198199,7912.9353,23.399169,37.4663,...,123.9243,138.6502,0.08694,132.4385,1.1503,22.387759,21.195221,4.071201,1.056264,2021-07-01 03:50:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21510,278240.3050,2.738931,78.9610,1.8256,188.9564,2.910863,2.781268,2642.6292,22.079191,12.2675,...,112.4272,127.4445,0.15855,120.0305,1.0000,22.490510,20.376348,3.016137,1.103756,2022-01-01 02:10:00
21511,286487.2900,2.787025,79.2188,2.1271,190.7893,2.965235,2.828971,2650.7681,22.190804,11.9649,...,111.2723,122.2140,0.16731,116.9854,1.0000,22.301784,20.325372,3.022770,1.097239,2022-01-01 02:20:00
21512,284482.5402,2.777156,79.5764,1.7485,190.5155,2.954248,2.819408,2567.6990,22.232210,12.0150,...,118.9099,125.0960,0.12505,121.7636,1.0000,22.398736,20.240186,2.943116,1.106647,2022-01-01 02:30:00
21513,284484.0000,2.767288,79.6262,1.9184,190.3087,2.944733,2.809510,2558.0855,22.232163,12.0111,...,122.4947,129.5862,0.13722,125.4965,1.0000,22.465758,20.219835,2.925310,1.111075,2022-01-01 02:40:00


In [7]:
X_shift_3 = predicted_df_shift_2.drop(columns=['DATE_TIME'])
X_shift_3.index = predicted_df_shift_2['DATE_TIME']
model_shift_3, predicted_df_shift_3 = train_and_predict(X_shift_3, X_shift_3, shift_hours=3)
predicted_df_shift_3['CO_CO2_ratio'] = predicted_df_shift_3['CO'] / predicted_df_shift_3['CO2']
#predicted_df_shift_3.to_csv('predicted_data_after_3_hours.csv', index=False)
predicted_df_shift_3

Shift 3 hours - Mean Squared Error: 695.8551804315289
Shift 3 hours - Mean Absolute Error: 3.3533994174618194
Shift 3 hours - R-squared: 0.9844614558537218


Unnamed: 0,CB_FLOW,CB_PRESS,CB_TEMP,STEAM_FLOW,STEAM_TEMP,STEAM_PRESS,O2_PRESS,O2_FLOW,O2_PER,PCI,...,TOP_TEMP3,TOP_TEMP4,TOP_SPRAY,TOP_TEMP,TOP_PRESS_1,CO,CO2,H2,CO_CO2_ratio,DATE_TIME
0,311746.247515,3.194661,126.175318,3.580503,207.920244,3.383350,3.243269,7165.812938,23.449808,31.837742,...,115.721998,127.591650,0.145991,122.495016,1.376422,22.380068,21.018512,3.916008,1.064779,2021-07-01 06:10:00
1,315226.157275,3.203188,129.713053,4.093949,206.180200,3.397471,3.248593,7832.085333,23.545325,30.613544,...,130.770103,141.369481,0.137229,136.664195,1.052984,22.519696,21.156676,4.031023,1.064425,2021-07-01 06:20:00
2,314560.552087,3.212268,128.352805,3.910907,206.655326,3.404596,3.261980,7798.419594,23.559648,31.695649,...,118.306654,133.407285,0.140119,127.840593,1.081588,22.545685,21.041246,4.015715,1.071500,2021-07-01 06:30:00
3,312495.330806,3.203419,126.618198,3.659026,195.229880,3.393267,3.253109,7968.643020,23.626158,34.240487,...,117.838187,133.975926,0.151956,126.512196,1.300014,22.508331,21.157608,4.117732,1.063841,2021-07-01 06:40:00
4,302918.152333,3.159087,125.979935,3.349701,194.016841,3.341342,3.208908,7900.846092,23.478974,38.334462,...,127.644255,143.000869,0.109346,134.939477,1.169514,22.416772,21.170698,4.068078,1.058858,2021-07-01 06:50:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21510,278225.322723,2.734542,79.105858,1.892590,189.053589,2.908180,2.777709,2665.293203,22.090009,12.114603,...,113.546635,128.379477,0.151454,121.346362,1.000000,22.594660,20.386836,3.005725,1.108297,2022-01-01 05:10:00
21511,286482.625934,2.785343,79.930269,2.424608,190.686902,2.966383,2.827970,2716.573305,22.204626,11.822777,...,112.148460,122.198061,0.156622,117.656524,1.000006,22.400899,20.363148,3.017089,1.100071,2022-01-01 05:20:00
21512,284478.772662,2.769043,79.895103,2.171012,190.532441,2.948000,2.811375,2549.095104,22.238614,11.614802,...,119.740963,126.716899,0.130312,123.252453,1.000000,22.452869,20.253527,2.944428,1.108591,2022-01-01 05:30:00
21513,284486.576177,2.765642,79.846593,2.266926,190.470011,2.944711,2.807807,2546.436784,22.237986,11.749784,...,122.450626,129.679181,0.137207,126.048584,1.000000,22.464496,20.237481,2.956603,1.110044,2022-01-01 05:40:00


In [8]:
X_shift_4 = predicted_df_shift_3.drop(columns=['DATE_TIME'])
X_shift_4.index = predicted_df_shift_3['DATE_TIME']
model_shift_4, predicted_df_shift_4 = train_and_predict(X_shift_4, X_shift_4, shift_hours=4)
predicted_df_shift_4['CO_CO2_ratio'] = predicted_df_shift_4['CO'] / predicted_df_shift_4['CO2']
#predicted_df_shift_4.to_csv('predicted_data_after_4_hours.csv', index=False)
predicted_df_shift_4

Shift 4 hours - Mean Squared Error: 707.4575175175585
Shift 4 hours - Mean Absolute Error: 2.648302909723281
Shift 4 hours - R-squared: 0.989831626848118


Unnamed: 0,CB_FLOW,CB_PRESS,CB_TEMP,STEAM_FLOW,STEAM_TEMP,STEAM_PRESS,O2_PRESS,O2_FLOW,O2_PER,PCI,...,TOP_TEMP3,TOP_TEMP4,TOP_SPRAY,TOP_TEMP,TOP_PRESS_1,CO,CO2,H2,CO_CO2_ratio,DATE_TIME
0,311736.134684,3.196749,125.568419,3.531966,207.482920,3.385321,3.245320,7126.455861,23.484478,31.645743,...,115.674998,126.190607,0.162725,121.297731,1.301222,22.421341,21.012105,3.905650,1.067068,2021-07-01 10:10:00
1,315230.552987,3.210775,130.096622,4.126506,207.209139,3.405963,3.256922,7856.678133,23.618244,30.547165,...,131.107160,141.582543,0.154540,136.330166,1.076990,22.487783,21.194043,4.051255,1.061043,2021-07-01 10:20:00
2,314523.389864,3.217420,128.552659,3.782843,206.354306,3.408908,3.266863,7762.191997,23.617566,32.180581,...,117.965167,132.634555,0.160017,126.603231,1.081613,22.541529,21.046616,4.016781,1.071029,2021-07-01 10:30:00
3,312496.243402,3.203957,126.181173,3.649592,195.064553,3.393543,3.253606,7987.262759,23.619104,35.163143,...,118.624634,134.988269,0.149607,127.141358,1.319594,22.520684,21.155149,4.103153,1.064549,2021-07-01 10:40:00
4,302899.302457,3.163958,125.915279,3.303802,194.121296,3.346821,3.213735,7887.354416,23.520239,38.439172,...,128.649446,144.323699,0.121213,135.414074,1.187437,22.432313,21.158635,4.060343,1.060197,2021-07-01 10:50:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21510,278233.852354,2.729438,79.544167,1.959065,189.221601,2.904105,2.772804,2685.120005,22.107669,12.064698,...,113.958664,128.752650,0.147405,121.984152,1.000010,22.644295,20.371508,3.003054,1.111567,2022-01-01 09:10:00
21511,286467.116212,2.783573,80.919171,2.601820,190.437606,2.966558,2.826558,2781.071816,22.224713,11.704869,...,112.486845,122.633526,0.150448,118.313861,1.000007,22.468695,20.385930,3.010045,1.102167,2022-01-01 09:20:00
21512,284477.527076,2.764485,79.913814,2.412954,190.568598,2.944402,2.806799,2538.565056,22.240827,11.449884,...,120.352911,127.622784,0.133434,124.060107,1.000000,22.473124,20.262261,2.951801,1.109112,2022-01-01 09:30:00
21513,284484.624066,2.763212,79.966971,2.447276,190.507037,2.943067,2.805322,2536.768583,22.236660,11.540385,...,121.377421,129.204766,0.137436,125.419512,1.000000,22.477420,20.236804,2.963647,1.110720,2022-01-01 09:40:00
