# Prediction Using Linear Regression
- Set up linear regression model to predict flue NOx levels
- Examine accuracy of model
- Set up models for tcf and f.co and examine accuracy

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go

In [None]:
# read cleaned data
df = pd.read_hdf('../data/clean-data.h5')
df.head()

In [None]:
# list of input features
inputs = ['air.flow','air.temp','air.frac','fuel.flow']

# list of output features
columns = ['tc1','tc2','tc3','tc4','tcf','f.h2o','f.co2','f.o2','f.ch4','f.co','f.nox']

# selected output feature
output = ['f.nox']

In [None]:
# split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(df[inputs], df[outputs], test_size=0.2,random_state=42)

# initiate the linear regression model
model = LinearRegression()

# fit the linear model using the input data
model.fit(X_train,y_train)

In [None]:
# determine r2 score for model
score = model.score(X_test,y_test)
display('R2: ' + str(score))

# predict output for test data
predictions = model.predict(X_test)

# create dataframe with prediction results
predictionsDF = pd.DataFrame(predictions,columns=output)
predictionsDF.head()

In [None]:
# Plot parity of actual versus predicted values
parity = go.Figure()

# add test v. predicted markers
parity.add_trace(
    go.Scatter(
        x=y_test['f.nox'],
        y=predictionsDF['f.nox'],
        mode='markers',
        name='results'
    )
)

# add parity line
parity.add_trace(
    go.Scatter(
        x=y_test['f.nox'],
        y=y_test['f.nox'],
        name='parity'
    )
)

# update layout and title
parity.update_layout(height=800,width=800,title="NOx Actual vs. Predicted")

#display figure
parity.show()