In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

df = pd.read_csv('COP_data_final.csv', sep=';', decimal=',')

df['T_sup_HP'] = df['T_sup_HP'] #+ 273
df['T_OA'] = df['T_OA'] #+ 273
df['COP1_real'] = 1/df['COP_real']

# Get the position of NaN values and remove the rows
df = df.replace('NaN', np.nan)
NaN_index = np.where(df['COP_real'].isna())[0]
df = df.dropna().reset_index()
df_regression = df

df

Unnamed: 0,index,T_OA,T_sup_HP,COP_real,Q_HP_real,W_HP,m_HP,COP1_real
0,0,-25,30,2.08,9.25,4.45,0.67,0.480769
1,1,-20,30,2.26,10.63,4.70,0.67,0.442478
2,2,-15,30,2.45,12.00,4.90,0.67,0.408163
3,3,-7,30,3.12,14.00,4.49,0.67,0.320513
4,4,-4,30,3.30,14.00,4.24,0.67,0.303030
...,...,...,...,...,...,...,...,...
81,91,7,65,2.50,14.00,5.60,0.34,0.400000
82,92,10,65,2.65,14.00,5.28,0.34,0.377358
83,93,15,65,2.90,14.00,4.83,0.34,0.344828
84,94,18,65,3.05,14.00,4.59,0.34,0.327869


In [2]:
df[df['T_OA']==30]

Unnamed: 0,index,T_OA,T_sup_HP,COP_real,Q_HP_real,W_HP,m_HP,COP1_real


In [18]:
worst_cases = {}

for temp in list(pd.unique(df.T_OA)):
    df_T_OA = df[df['T_OA']==temp]
    worst_cases[temp] = round(1/min(list(df_T_OA.COP_real)),2)

print(worst_cases)

df_thomas = pd.DataFrame(list(worst_cases.items()), columns=['T_OA', 'COP1'])
display(df_thomas)

{-25: 0.8, -20: 0.71, -15: 0.59, -7: 0.47, -4: 0.51, -2: 0.49, 2: 0.44, 7: 0.4, 10: 0.38, 15: 0.34, 18: 0.33, 20: 0.32}


Unnamed: 0,T_OA,COP1
0,-25,0.8
1,-20,0.71
2,-15,0.59
3,-7,0.47
4,-4,0.51
5,-2,0.49
6,2,0.44
7,7,0.4
8,10,0.38
9,15,0.34


In [27]:
# Declare the model
mod = smf.ols(formula='COP1 ~ T_OA', data=df_thomas)

# Fit the model (finds the optimal coefficients, adding a random seed ensures consistency)
np.random.seed(2) 
res = mod.fit()

# Print the summary output
#print(res.summary())

intercept = res.params.Intercept
coeff = res.params.T_OA

print(intercept)
print(coeff)

0.480841967574266
-0.009896389108808657


# Predicting COP with a linear model

In [None]:
# Declare the model
mod = smf.ols(formula='COP_real ~ T_OA', data=df_regression)

# Fit the model (finds the optimal coefficients, adding a random seed ensures consistency)
np.random.seed(2) 
res = mod.fit()

# Print the summary output
print(res.summary())

In [21]:
def cop(outT):
    return res.params.Intercept + res.params.T_OA*outT

# Get all the predicted COPs in a list
COP_list = []
for i in range(len(df['T_OA'])):
    COP_list.append(round(cop(df['T_OA'][i]),2))
    #else: COP_list.append(14)

# Get a list of all the real COPs
COP_list_real = list(df.COP_real)

# Compare with the real COPs
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(COP_list_real, COP_list)
print("MSE = {}".format(round(mse,3)))
print("sqrt(MSE) = {}".format(round(np.sqrt(mse)/np.mean(df.COP_real),3)))

print("\nIntercept: {}".format(res.params.Intercept))
print("T_OA coeff: {}".format(res.params.T_OA))

MSE = 9.961
sqrt(MSE) = 0.955

Intercept: 0.480841967574266
T_OA coeff: -0.009896389108808657


In [None]:
print(round(cop(273-2),4))

In [None]:
df = pd.read_csv('COP_data_final.csv', sep=';', decimal=',')
df['COP_pred'] = 0
df['COP_real'] = 1/df['COP_real']

j = 0
for i in range(len(df.COP_real)):    
    if i in NaN_index:
        df['COP_pred'].iloc[i] = np.nan
    else:
        df['COP_pred'].iloc[i] = COP_list[j]
        j = j+1
        
df = df.fillna(0)

In [None]:
 # Creating a 2x4 (2 height, 4 width) grid of plots
fig, ax = plt.subplots(2,4, figsize=(19,10), sharex=True, sharey=True)

for i in range(2):
    for j in range(4):
        
        # This is the trick: i*width + j
        k = i*4 + j
        begin = k*12
        end = (k+1)*12
        
        # Plot the real and predicted 1/COP curves
        ax[i,j].scatter(df.T_OA[begin:end], df.COP_real[begin:end], label="1/COP real")
        ax[i,j].scatter(df.T_OA[begin:end], df.COP_pred[begin:end], label="1/COP predicted")
        
        # Set titles and labels
        mse_k = mean_squared_error(df.COP_real[begin:end], df.COP_pred[begin:end])
        ax[i,j].set_title("T_sup_HP = {}°C, RMSE = {}%".format(df.T_sup_HP[begin+2], round(100*np.sqrt(mse_k)/np.mean(df.COP_real[begin:end]))))
        if i==1: ax[i,j].set_xlabel("Outside temperature T_OA [°C]")
        if j==0: ax[i,j].set_ylabel("1/COP")
        ax[i,j].legend()
        ax[i,j].set_ylim(0,1)
        
        # Plot the absolute error
        ax2 = ax[i,j].twinx()
        ax2.plot(df.T_OA[begin:end], abs(df.COP_real[begin:end] - df.COP_pred[begin:end]), alpha=0.2)
        ax2.set_ylim(0,1)
        if j==3: ax2.set_ylabel("Absolute error [W]")
        if j!=3: ax2.set_yticklabels([])
        
plt.savefig('/Users/thomasdefauw/Desktop/sine_wave.jpg', dpi=300)
plt.show()
