# Task for Today  

***

## Critical Heat Flux Prediction  
  
Given *data about various experimental conditions*, let's try to predict the **critical heat flux** for a given experiment.  
  
We will use a random forest regression model to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.ensemble import RandomForestRegressor

In [None]:
data = pd.read_csv('../input/predicting-heat-flux/Data_CHF_Zhao_2020_ATE.csv')

In [None]:
data

In [None]:
data.info()

# Preprocessing

In [None]:
def preprocess_inputs(df):
    df = df.copy()
    
    # Drop id and author columns
    df = df.drop(['id', 'author'], axis=1)
    
    # Shuffle the dataset
    df = df.sample(frac=1.0, random_state=1)
    
    # Split df into X and y
    y = df['chf_exp [MW/m2]']
    X = df.drop('chf_exp [MW/m2]', axis=1)
    
    return X, y

In [None]:
X, y = preprocess_inputs(data)

In [None]:
X

In [None]:
y

# Building Pipeline

In [None]:
def build_model():
    
    nominal_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))
    ])
    
    preprocessor = ColumnTransformer(transformers=[
        ('nominal', nominal_transformer, ['geometry'])
    ], remainder='passthrough')
    
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', RandomForestRegressor(random_state=1))
    ])
    
    return model

# Training

In [None]:
kf = KFold(n_splits=5)

rmses = []

for train_idx, test_idx in kf.split(X):
    
    X_train = X.iloc[train_idx, :]
    X_test = X.iloc[test_idx, :]
    y_train = y.iloc[train_idx]
    y_test = y.iloc[test_idx]
    
    model = build_model()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    rmse = np.sqrt(np.mean((y_test - y_pred)**2))
    
    rmses.append(rmse)

final_rmse = np.mean(rmses)

In [None]:
print("RMSE: {:.2f}".format(final_rmse))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/rK_Y9DjQ8js