In [2]:
# Basic environment modules
import numpy as np
import pandas as pd
import psycopg2 as pg # PostgreSQL module
import time
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style

# Data analysis modules
from CoolProp.CoolProp import PropsSI
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer, PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from scipy.integrate import ode
from sqlalchemy import create_engine

# User-defined libraries
from Model import *
from PhysicalProperty import *
from StructuredQuery import *
from Numeric import *
from Authentication import *

print("Import modules successfully.")


# load class instances
sql = StructuredQuery()
pro = PhysicalProperty()
mod = Model()
oau = Authentication()

Import modules successfully.


In [6]:
# Connect to DB Server and get data
# Load json file (Security)
json_parse = oau.get_apikey(json_filename='key.json')
json_res = pd.json_normalize(json_parse['sql'])

# Set query from DB
sql = "SELECT * FROM CHF.raw_database2 WHERE refri IN ('D2O', 'H2O','R12', 'R22') AND idx != '31' AND p <= 220" 

# Connect DB server
#(conn, db_engine) = sql.connect(json_res["host"][1], json_res["dbname"][1], json_res["user"][1], json_res["port"][1], json_res["password"][1], json_res["service"][1]) #postgreSQL
connect = pys.connect(host=json_res["host"][1], user = json_res["user"][1], password = json_res["password"][1], cursorclass=pys.cursors.DictCursor)
cur = connect.cursor()
cur.execute(sql)


result = cur.fetchall()
raw_tb = pd.DataFrame(result)

cur.close()
del result # delete temp file
del sql # delete quiry log

print("Loading database completed. data size: {}".format(len(raw_tb)))

Loading database completed. data size: 13996


In [None]:
# Calculation of physical properties

# if physical properties can't calculated, the other step is need (interpolation based on the physical properties table) = Sodium
prop_tb = raw_tb.copy()

"""
R113, R114 can't use viscosity model -> re-mapping these models based on interpolation tables
R12 Pressure range : 0.242551~4.1361e+06 Pa (P, Q Flash)
H2O Pressure range : 611.655~2.2064e+07 Pa (P, Q Flash)
"""

# start the timer
start_time = time.time()
print('START TIME :',str(datetime.now())[10:19] )

# compute physical properties
for i in range(0, len(raw_tb)):
    # calculated physical properties at saturation point
    prop_tb.loc[i,'tsat'] = round(PropsSI('T', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 0, raw_tb.loc[i, 'refri']),12)
    prop_tb.loc[i,'pcrit'] = round(PropsSI(raw_tb.loc[i, 'refri'],'pcrit') * 10**-5, 12)
    prop_tb.loc[i,'rdcp'] = round(raw_tb.loc[i,'p']/prop_tb.loc[i,'pcrit'],12)
    prop_tb.loc[i,'rhof'] = round(PropsSI('D', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 0, raw_tb.loc[i, 'refri']),12)
    prop_tb.loc[i,'rhov'] = round(PropsSI('D', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 1, raw_tb.loc[i, 'refri']),12)
    prop_tb.loc[i,'muf'] = round(PropsSI('V', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 0, raw_tb.loc[i, 'refri']),12)
    prop_tb.loc[i,'muv'] = round(PropsSI('V', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 1, raw_tb.loc[i, 'refri']),12)
    prop_tb.loc[i,'hfo'] = round(PropsSI('H', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 0, raw_tb.loc[i, 'refri'])*1e-3,12) #[kJ/kg]
    prop_tb.loc[i,'hvo'] = round(PropsSI('H', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 1, raw_tb.loc[i, 'refri'])*1e-3,12) #[kJ/kg]
    prop_tb.loc[i, 'lam'] = round((prop_tb.loc[i, 'hvo'] - prop_tb.loc[i,'hfo']),12) # [kJ/kg]

    if pd.isna(raw_tb.loc[i, 'v']):
        prop_tb.loc[i, 'v']     = round(raw_tb.loc[i, 'g'] / prop_tb.loc[i, 'rhof'], 12) # [m/s]
    else:
        prop_tb.loc[i, 'v'] = round(raw_tb.loc[i, 'v']) # [m/s]
    prop_tb.loc[i, 'cpf']   = round(PropsSI('C', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 0, raw_tb.loc[i, 'refri']),12) # [J/kgK]
    prop_tb.loc[i, 'cpv']   = round(PropsSI('C', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 1, raw_tb.loc[i, 'refri']),12) # [J/kgK]
    prop_tb.loc[i, 'sigma'] = round(PropsSI('I', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 0, raw_tb.loc[i, 'refri']),12) # [N/m]
    prop_tb.loc[i, 'kf'] = round(PropsSI('L', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 0, raw_tb.loc[i, 'refri']),12) # [W/m/K]
    prop_tb.loc[i, 'kv'] = round(PropsSI('L', 'P', raw_tb.loc[i, 'p'] * 1e5, 'Q', 1, raw_tb.loc[i, 'refri']),12) # [W/m/K]

    # enthin is empty
    # Fill Na related to inlet subcooling
    if np.isnan(raw_tb.loc[i, 'enthin']):
        if np.isnan(raw_tb.loc[i, 'tin']):
            # enthin, tin is empty
            if np.isnan(raw_tb.loc[i, 'xi']):
                # enthin, tin, and xi is empty
                prop_tb.loc[i, 'hin'] = 9999
                prop_tb.loc[i, 'enthin'] = prop_tb.loc[i, 'hfo'] - prop_tb.loc[i, 'hin'] # [kJ/kg]
                print("Error")
            else:
                # hin calculate
                prop_tb.loc[i, 'hin'] = round(prop_tb.loc[i, 'hfo'] - raw_tb.loc[i, 'xi']*prop_tb.loc[i, 'lam'], 12)
                prop_tb.loc[i, 'enthin'] = prop_tb.loc[i, 'hfo'] - prop_tb.loc[i, 'hin'] # [kJ/kg]
        else:
            prop_tb.loc[i, 'hin'] = round(PropsSI('H', 'P', raw_tb.loc[i, 'p'] * 1e5, 'T', raw_tb.loc[i, 'tin']+273.15, raw_tb.loc[i, 'refri']),12) # calculate inlet enthalpy using tin
            prop_tb.loc[i, 'enthin'] = prop_tb.loc[i, 'hfo'] - prop_tb.loc[i, 'hin'] # [kJ/kg]
    else:
        prop_tb.loc[i, 'hin'] = round(prop_tb.loc[i, 'hfo'] - raw_tb.loc[i, 'enthin'], 12) # [kJ/kg]
        prop_tb.loc[i, 'enthin'] = prop_tb.loc[i, 'hfo'] - prop_tb.loc[i, 'hin'] # [kJ/kg]
    
    prop_tb.loc[i, 'xi'] = -prop_tb.loc[i, 'enthin'] / prop_tb.loc[i, 'lam'] #[-]

    # xe is empty
    # Fill Na() related to outlet thermodynamic quality
    if pd.isna(raw_tb.loc[i, 'xe']):
        if pd.isna(raw_tb.loc[i, 'enthout']):
            # xe and enthout is empty
            prop_tb.loc[i, 'xe'] = round(pro.cal_xe(raw_tb.loc[i, 'q'], raw_tb.loc[i, 'doi'], raw_tb.loc[i, 'dio'], raw_tb.loc[i, 'geo'], raw_tb.loc[i, 'hs'], raw_tb.loc[i, 'g'], raw_tb.loc[i, 'enthin'], raw_tb.loc[i, 'lh'], prop_tb.loc[i, 'lam']), 12)
            prop_tb.loc[i, 'enthout'] = round(raw_tb.loc[i, 'xe'] * prop_tb.loc[i, 'lam'], 12)  #[-]
        else:
            prop_tb.loc[i, 'xe'] = round(-raw_tb.loc[i, 'enthout']/prop_tb.loc[i, 'lam'], 12)  #[-]
            prop_tb.loc[i, 'enthout'] = round(raw_tb.loc[i, 'enthout'], 12) # Copying raw data  #[-]
    else:
        prop_tb.loc[i, 'xe'] = round(raw_tb.loc[i, 'xe'], 12) # Copying raw data  #[-]
        prop_tb.loc[i, 'enthout'] = round(raw_tb.loc[i, 'xe'] * prop_tb.loc[i, 'lam'], 12)  #[-]


engine = create_engine('mysql+pymysql://{}:{}@{}:{}/{}'.format(json_res["user"][1],json_res["password"][1],json_res["host"][1],json_res["port"][1],"CHF"), echo=False, encoding='utf-8')

# Export results of physical property to MariaDB
prop_tb.to_sql('prop_chf_tb', engine, if_exists='replace')

del engine

# stop the timer
end_time1 = time.time()
print('END TIME :',str(datetime.now())[10:19])

# calculate the elapsed time
elapsed_time1 = end_time1 - start_time

# print the elapsed time
print("Elapsed time to compute the physical properties: {:.2f} seconds".format(elapsed_time1))

In [None]:
# start the timer
print('START TIME :',str(datetime.now())[10:19] )

for i, row in prop_tb.iterrows():
    # Check heat balance
    prop_tb.loc[i,'hval'] = round(prop_tb.loc[i,'xi'] + 4*prop_tb.loc[i, 'q']*10**3*prop_tb.loc[i,'lh']/(prop_tb.loc[i, 'dh']*prop_tb.loc[i,'g']*prop_tb.loc[i,'lam']),6)
    prop_tb.loc[i,'param_hval'] = round(np.abs(prop_tb.loc[i ,'xe']-prop_tb.loc[i, 'hval'])/prop_tb.loc[i, 'hval']*100, 6)
    if prop_tb.loc[i, 'param_hval'] < 10:
        prop_tb.loc[i, 'yn_hval'] = 100
    else:
        prop_tb.loc[i, 'yn_hval'] = 110

    # Calculate dimensionless number
    prop_tb.loc[i, 'pe'] = round(
        pro.cal_pe(prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'g'], prop_tb.loc[i, 'cpf'], prop_tb.loc[i, 'kf']), 6)
    prop_tb.loc[i, 're'] = round(pro.cal_re(prop_tb.loc[i, 'g'], prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'muf']), 6)
    prop_tb.loc[i, 'we'] = round(
        pro.cal_we(prop_tb.loc[i, 'rhof'], prop_tb.loc[i, 'v'], prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'sigma']), 6)
    prop_tb.loc[i, 'bd'] = round(
        pro.cal_bd(prop_tb.loc[i, 'rhof'], prop_tb.loc[i, 'rhov'], prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'sigma']), 6)
    prop_tb.loc[i, 'pr'] = round(pro.cal_pr(prop_tb.loc[i, 'cpf'], prop_tb.loc[i, 'muf'], prop_tb.loc[i, 'kf']), 6)
    prop_tb.loc[i, 'prv'] = round(pro.cal_pr(prop_tb.loc[i, 'cpv'], prop_tb.loc[i, 'muv'], prop_tb.loc[i, 'kv']), 6)
    prop_tb.loc[i, 'ca'] = round(
        pro.cal_ca(prop_tb.loc[i, 'muf'], prop_tb.loc[i, 'v'], prop_tb.loc[i, 'sigma'], prop_tb.loc[i, 'rhof']), 6)
    prop_tb.loc[i, 'lc'] = round(np.sqrt(prop_tb.loc[i, 'sigma']/(9.8*(prop_tb.loc[i, 'rhof']-prop_tb.loc[i, 'rhov']))), 6)

    if prop_tb.loc[i, 'geo'] == 'A':
        prop_tb.loc[i, 'dl'] = prop_tb.loc[i, 'dh']-2*prop_tb.loc[i, 'lc']
    elif prop_tb.loc[i, 'geo'] == 'R':
        if prop_tb.loc[i, 'dio'] < prop_tb.loc[i, 'lc']:
            prop_tb.loc[i, 'dl'] = prop_tb.loc[i, 'dh']/prop_tb.loc[i, 'ar']
        else:
            prop_tb.loc[i, 'dl'] = ((2*prop_tb.loc[i, 'doi']*(prop_tb.loc[i, 'dio'] - prop_tb.loc[i, 'lc']))/(prop_tb.loc[i, 'doi']+prop_tb.loc[i, 'dio'] - prop_tb.loc[i, 'lc']))/prop_tb.loc[i, 'ar']
    else:
        prop_tb.loc[i, 'dl'] = prop_tb.loc[i, 'dh']+2*prop_tb.loc[i, 'lc']

# stop the timer
end_time2 = time.time()
print('END TIME :',str(datetime.now())[10:19])

# calculate the elapsed time
elapsed_time2 = end_time1 - end_time2

# print the elapsed time
print("Elapsed time to compute the geometrical or dimensionless parameters: {:.2f} seconds".format(elapsed_time2))


In [None]:
# set iniital calculation method
m_idx = 1
cal_idx = 3

# create log table 
log_tb = pd.DataFrame()

# set global parameters
global j
j = 0
tolerance = 0.0001

# check start time
print('START TIME : ',str(datetime.now())[10:19])

for i, row in prop_tb.iterrows():
    if m_idx == 1:
        prop_tb.loc[i, 'mth_xosv'] = 'SZ'
    else:
        prop_tb.loc[i, 'mth_xosv'] = 'Levy'
    
    cnt = 0
    # Preparing alpha and gamma
    prop_tb.loc[i, 'log_id'] = int(i) # Set foreign key to log-table
    prop_tb.loc[i, 'alpha'] = round(1.669-6.544*(prop_tb.loc[i, 'rdcp']/prop_tb.loc[i, 'dh']-0.448)**2,16)
    prop_tb.loc[i, 'gamma'] = round(0.06523 + (0.1045/(np.sqrt((2*np.pi)*(np.log(prop_tb.loc[i, 'rdcp']))**2))) * np.exp(-5.413*((np.log(prop_tb.loc[i, 'rdcp'])+0.4537)**2/(np.log(prop_tb.loc[i, 'rdcp'])**2))),16)
    log_tb.loc[j, 'old_qcal'] = prop_tb['q'].mean()
    
    while 1:
        # Prepare : log-table
        log_tb.loc[j, 'run_id'] = int(i)
        log_tb.loc[j, 'org_xi'] = prop_tb.loc[i, 'xi']
        log_tb.loc[j, 'org_xe'] = prop_tb.loc[i, 'xe']
        log_tb.loc[j, 'org_q'] = prop_tb.loc[i, 'q']
        
        if j == 0:
            pass
        else:
            pass

        # Step 1: Calculate old_xosv (initailzation using experimental qCHF)
        if m_idx == 1:
            log_tb.loc[j, 'old_dtsz'], log_tb.loc[j, 'old_xosv'] = mod.cal_SZ(log_tb.loc[j, 'old_qcal'], prop_tb.loc[i, 'rhof'], prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'g'], prop_tb.loc[i, 'cpf'], prop_tb.loc[i, 'kf'], prop_tb.loc[i, 'pe'], prop_tb.loc[i, 'lam'])
        else:
            log_tb.loc[j, 'old_dtsz'], log_tb.loc[j, 'old_xosv'] = mod.cal_Levy(prop_tb.loc[i, 'sigma'], prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'rhof'], prop_tb.loc[i, 'muf'], prop_tb.loc[i, 'kf'], prop_tb.loc[i, 're'], prop_tb.loc[i, 'pr'], prop_tb.loc[i, 'cpf'], prop_tb.loc[i, 'g'], log_tb.loc[j, 'old_qcal'], prop_tb.loc[i, 'lam'], prop_tb.loc[i, 'v'])

        # Step 2: Calculate old_xe
        if cnt == 0:
            log_tb.loc[j, 'old_xe'] = prop_tb.loc[i, 'xe']
        else:
            log_tb.loc[j, 'old_xe'] = pro.cal_xe(log_tb.loc[j, 'old_qcal'], prop_tb.loc[i, 'doi'], prop_tb.loc[i, 'dio'], prop_tb.loc[i, 'geo'], prop_tb.loc[i, 'hs'], prop_tb.loc[i, 'g'], prop_tb.loc[i, 'enthin'], prop_tb.loc[i, 'lh'], prop_tb.loc[i, 'lam'], ch = 0)

        # Step 2: Calculate old_xt (initialization)
        log_tb.loc[j, 'old_xt'] = round(mod.cal_xt(log_tb.loc[j, 'org_xi'], log_tb.loc[j, 'old_xosv'], log_tb.loc[j, 'old_xe']),6)

        # Step 3: Calculate new_qCHF
        log_tb.loc[j, 'alpha'], log_tb.loc[j, 'gamma'], log_tb.loc[j, 'zxt'], log_tb.loc[j, 'new_qcal'] = mod.calCHFDeng(prop_tb.loc[i, 'rdcp'], prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'g'], log_tb.loc[j, 'old_xt'])

        #print("run_id: {:.4f}, log_id: {:.4f}, old_xt: {:.4f}, old_xosv: {:.4f}, qexp: {:.4f}, old qcal: {:.4f}, new qcal: {:.4f}".format(i, j, log_tb.loc[j, 'old_xt'], log_tb.loc[j, 'old_xosv'], log_tb.loc[j, 'org_q'], log_tb.loc[j, 'old_qcal'], log_tb.loc[j, 'new_qcal']))

        # Step 1: Calculate old_xosv (initailzation using experimental qCHF)
        log_tb.loc[j, 'new_dtsz'], log_tb.loc[j, 'new_xosv'] = mod.cal_SZ(log_tb.loc[j, 'new_qcal'], prop_tb.loc[i, 'rhof'], prop_tb.loc[i, 'dh'], prop_tb.loc[i, 'g'], prop_tb.loc[i, 'cpf'], prop_tb.loc[i, 'kf'], prop_tb.loc[i, 'pe'], prop_tb.loc[i, 'lam'])

        # Step 4: Loop control
        #j += 1 # increase cnt of row number of log-table
        cnt += 1 # Increase limitation

        # Step 5: Valudation
        val = round(abs((log_tb.loc[j, 'old_qcal'] - log_tb.loc[j, 'new_qcal'])/log_tb.loc[j, 'new_qcal']),6)
        #print("val: {:.6f}".format(val))

        if val <= tolerance:
            print("Data converged")
            del cnt
            # Step 2: Calculate old_xt (initialization)
            prop_tb.loc[i, 'cal_xt'] = round(float(mod.cal_xt(log_tb.loc[j, 'org_xi'], log_tb.loc[j, 'new_xosv'], log_tb.loc[j, 'old_xe'])),6)
            prop_tb.loc[i, 'cal_xe'] = round(log_tb.loc[j, 'old_xe'], 4)
            prop_tb.loc[i, 'cal_xosv'] = round(log_tb.loc[j, 'new_xosv'], 4)
            prop_tb.loc[i, 'converged'] = 100
            prop_tb.loc[i, 'alpha'] = log_tb.loc[j, 'alpha']
            prop_tb.loc[i, 'gamma'] = log_tb.loc[j, 'gamma']
            prop_tb.loc[i, 'zxt'] = round(float(log_tb.loc[j, 'zxt']), 6)
            prop_tb.loc[i, 'qcal'] = log_tb.loc[j, 'new_qcal']
            prop_tb.loc[i, 'qval'] = ((log_tb.loc[j, 'new_qcal'] - log_tb.loc[j, 'org_q'])/log_tb.loc[j, 'org_q'])*100
            j += 1
            break
        else:
            if cnt > 100:
                print("This data doesn't converged")
                del cnt
                prop_tb.loc[i, 'cal_xt'] = round(float(mod.cal_xt(log_tb.loc[j, 'org_xi'], log_tb.loc[j, 'new_xosv'], log_tb.loc[j, 'old_xe'])),6)
                prop_tb.loc[i, 'cal_xe'] = log_tb.loc[j, 'old_xe']
                prop_tb.loc[i, 'cal_xosv'] = log_tb.loc[j, 'new_xosv']
                prop_tb.loc[i, 'converged'] = 110
                prop_tb.loc[i, 'alpha'] = log_tb.loc[j, 'alpha']
                prop_tb.loc[i, 'gamma'] = log_tb.loc[j, 'gamma']
                prop_tb.loc[i, 'zxt'] = round(float(log_tb.loc[j, 'zxt']), 6)
                prop_tb.loc[i, 'qcal'] = log_tb.loc[j, 'new_qcal']
                prop_tb.loc[i, 'qval'] = ((log_tb.loc[j, 'new_qcal'] - log_tb.loc[j, 'org_q'])/log_tb.loc[j, 'org_q'])*100
                j += 1
                break
            else:
                # Step 5: Recurrsive state
                #log_tb.loc[j+1, 'old_xt'] = (log_tb.loc[j, 'old_xt'] + 1)/2
                log_tb.loc[j+1, 'old_qcal'] = 0.1*log_tb.loc[j, 'new_qcal']+0.9*log_tb.loc[j, 'old_qcal']
                #log_tb.loc[j+1, 'old_xe'] = log_tb.loc[j, 'old_xe']
                j += 1
                continue

# stop the timer
end_time3 = time.time()
print('END TIME :',str(datetime.now())[10:19])

# calculate the elapsed time
elapsed_time3 = end_time2 - end_time3

# print the elapsed time
print("Elapsed time to compute the physical properties: {:.2f} seconds".format(elapsed_time3))

# delete parameters
del i, j, tolerance

In [None]:
# create engine
engine = create_engine('mysql+pymysql://{}:{}@{}:{}/{}'.format(json_res["user"][1],json_res["password"][1],json_res["host"][1],json_res["port"][1],"CHF"), echo=False, encoding='utf-8')

# export the log- and result-table to MariaDB
log_tb.to_sql('log_chf_tb', engine, if_exists='replace')
prop_tb.to_sql('res_chf_tb', engine, if_exists='replace')

# delete session
del engine

In [None]:
log_tb.loc[:, ['run_id','old_xosv','new_xosv','old_qcal','org_q', 'new_qcal']]

In [None]:
def delOutlierZ(data, threshold = 3):
    """
    data = list
    thresshold = 표준편차 제한
    """
    mean = np.mean(data)
    std = np.std(data)
    z_score = [(y-mean) / std for y in data]
    mask = np.where(np.abs(z_score) < threshold)
    return mask # mask 필터 형태의 값 반환 

# Alpha calcuation function
# Deng model을 기준으로 alpha, gamma를 계산하는 알고리즘.
# q ~ Xt를 비선형 회귀 (같은 p에서)

In [None]:
# KMeans Clusterting

# Preparing converged dataset
val_tb = prop_tb[(prop_tb['converged']==100) & (abs(prop_tb['hval']) < 10)].copy()
val_tb['Y_qcal'] = val_tb.apply(lambda x: ((x['q']*10**3/(x['lam']))), axis = 1) # Y value
val_tb['X_zxt'] = val_tb.apply(lambda x: (x['cal_xt'])*np.sqrt(x['rhov']/(x['rhof'])), axis = 1) # X value
#val_tb['X_zxt'] = val_tb.apply(lambda x: math.sqrt(x['g']*x['cal_xt']), axis = 1) # X value

# Preparing linear form of CHF correlation
X = val_tb[['dh', 'g', 'rdcp']]

# Choose the number of clusters 
k = len(val_tb)/2

# Initialize the KMeans Model
kmeans = KMeans(n_clusters=k)

# Fit the model to the data
kmeans.fit(X)

# Extract the labels (cluster assignments) and centroids
labels = kmeans.labels_
centroids = kmeans.cluster_centers_

# Add the labels and centroids to the original DataFrame
val_tb['cluster'] = labels
val_tb['centroid_distance'] = kmeans.transform(X).min(axis=1)

plt.rcParams['figure.figsize'] = (14,10)
plt.scatter(val_tb['X_zxt'], val_tb['Y_qcal'], c=val_tb['cluster'], cmap ='gist_rainbow')
plt.xlabel('Pressure [bar]')
plt.ylabel('ln(qc*sqrt(d))')
plt.show()


In [None]:
# Clustering grouping and calculation for loop

# define a function to perform linear regression on a group
def cal_lr(df):
    # create a linear regression object
    model = LinearRegression()

    # fit the model to the data
    model.fit(df[['X_zxt']], df['Y_qcal'])

    # return the slop and intercept of the regression line
    return round(model.coef_[0],6), round((model.intercept_),6)

def cal_mnmx(df, cols):
    # create a MinMaxScaler object
    scaler = MinMaxScaler()

    # fit the scaler to the dataframe
    scaler.fit(df[cols])

    # transform the selected columns of the dataframe using the scaler
    res = pd.DataFrame(scaler.transform(df[cols]), columns=cols)

    return res

# normalize the dataframe using

# group the DataFrame by the 'cluster' column
val_group = val_tb.groupby('cluster').apply(cal_lr)
val_group

In [None]:
# convert the tuple to a list of tuples
temp_list = [(x[0], x[1]) for x in val_group]
val_group_tb = pd.DataFrame(temp_list, columns = ['g_gamma', 'g_alpha'])
val_group_tb['g_gamma'] = val_group_tb.apply(lambda x: round(-x['g_gamma'],4), axis = 1) # Y value
val_group_tb['g_alpha'] = val_group_tb.apply(lambda x: round(x['g_alpha'],4), axis = 1) # Y value

#val_tb = val_tb.reset_index()
#val_group_tb = val_group_tb.reset_index()

# concatenate the two dataframe along rows
result = pd.merge(val_tb, val_group_tb, left_on='cluster', right_on = val_group_tb.index, how='outer').copy()
#result2 = pd.merge(result, tmp_res, on = result.index, how='outer').copy()

# 이 부분은 함수로 데이터프레임에 적용할 수 있도록 수정해야 함.
lr = LinearRegression()

X = round(result[['rdcp']],4)
Y = round(result['g_gamma'],4)
lr.fit(X, Y)

print('Intercept:', round(lr.intercept_,6))
print('Coefficients: ', lr.coef_)


In [None]:
# 그래프
fig = plt.figure()
ax1 = fig.add_subplot(4, 4, 1)
ax2 = fig.add_subplot(4, 4, 2)
ax3 = fig.add_subplot(4, 4, 3)
ax4 = fig.add_subplot(4, 4, 4)

plt.xlabel('Reduced Pressure [-]')
plt.ylabel('Alpha or Gamma')
plt.legend(loc='upper left')
ax1.scatter(result.loc[:, 'g'], result.loc[:, 'g_alpha'], c=result['rdcp'], cmap ='gist_rainbow')
ax2.scatter(result.loc[:,'g'], result.loc[:,'g_gamma'], c=result['p'], cmap ='gist_rainbow')
ax3.scatter(np.exp(result.loc[:, 'X_zxt']*result.loc[:,'g_gamma']), (result.loc[:,'Y_qcal']/result.loc[:,'g_alpha']), c=result['rdcp'], cmap ='gist_rainbow')
ax4.scatter(result.loc[:,'g_alpha'], result.loc[:,'g_gamma'], c=result['rdcp'], cmap ='gist_rainbow')

#ax1.plot(xs,ys1,'r-',lw=3)
#ax2.plot(xs,ys2,'b-',lw=3)

In [None]:
prop_tb.loc[:,'hval']

In [None]:
# alpha_avg 다항회귀
X = np.c_[prop_tb.loc[:,'rdcp'], (prop_tb.loc[:,'rdcp'])**2]
y1 = prop_tb.loc[:,'alpha']
y2 = prop_tb.loc[:, 'gamma']

# 선형 모델
model1 = LinearRegression()
model1.fit(X, y1)
model2 = LinearRegression()
model2.fit(X, y2)

xs = np.arange(min(prop_tb.rdcp),max(prop_tb.rdcp),0.02) # x 범위의 순차값 생성
ys1 = xs*model1.coef_[0] + (xs**2)*model1.coef_[1] + model1.intercept_
ys2 = xs*model2.coef_[0] + (xs**2)*model2.coef_[1] + model2.intercept_
# 2차 다항식 회귀

# 그래프
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)

plt.xlabel('Reduced Pressure [-]')
plt.ylabel('Alpha or Gamma')
plt.legend(loc='upper left')
ax1.scatter(prop_tb.loc[:,'rdcp'], prop_tb.loc[:,'alpha'], color = 'black')
ax2.scatter(prop_tb.loc[:,'rdcp'], prop_tb.loc[:,'gamma'], color = 'blue')

ax1.plot(xs,ys1,'r-',lw=3)
ax2.plot(xs,ys2,'b-',lw=3)


print(round(model1.coef_[0],4), round(model1.coef_[1],4), round(model1.intercept_,4))
print(round(model2.coef_[0],4), round(model2.coef_[1],4), round(model2.intercept_,4))