In [16]:
import pandas as pd
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, TimeSeriesSplit, learning_curve
from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, HuberRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, RationalQuadratic, Matern, ExpSineSquared
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

In [17]:
df = pd.read_csv('compiled_dataset.csv', index_col=0, parse_dates=True)
df = df.loc['2010-11-18':'2020-09-18']

In [33]:
# SPLIT TRAIN-TEST
train_size = int(0.9 * len(df))

X = df.drop(columns=['cushing_crude_oil_price'])
y = df['cushing_crude_oil_price']

X_train = df.iloc[:train_size].drop(columns=['cushing_crude_oil_price'])
y_train = df.iloc[:train_size]['cushing_crude_oil_price']

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_scaled = scaler.transform(X)

In [34]:
# SET UP MODELS
models = {
    'Linear Regression': LinearRegression(),
    'SVM Linear': SVR(kernel='linear'),
    'SVM Quadratic': SVR(kernel='poly', degree=2),
    'SVM Cubic': SVR(kernel='poly', degree=3),
    'Fine Tree': DecisionTreeRegressor(max_depth=5),
    'Medium Tree': DecisionTreeRegressor(max_depth=10),
    'Coarse Tree': DecisionTreeRegressor(max_depth=15),
    'Bagged Trees': BaggingRegressor(),
    'Boosted Trees': GradientBoostingRegressor()
}

In [40]:
rmse_score = lambda y, y_pred: np.sqrt(mean_squared_error(y, y_pred))

def update_model(model_name):
    # Get the selected model
    model = models[model_name]
        
    # Fit the model
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_scaled)
    out_sample_rmse = rmse_score(y, y_pred)
        
    # Create plot data
    plot_df = pd.DataFrame({
        'date': y.index,
        'Actual': y,
        'Predicted': y_pred
    })
       
    plot_data = pd.melt(plot_df, id_vars=['date'], 
                value_vars=['Actual', 'Predicted'],
                var_name='variable', value_name='value')
    
    return (
        f"{out_sample_rmse:.4f}",
        gr.LinePlot(
            value=plot_data,
            x='date',
            y='value',
            color='variable',
            title=f'Test results: {model_name}',
            overlay_point=True,
            tooltip=['date', 'variable', 'value'],
            height=400
        ),
    )

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            model_dropdown = gr.Dropdown(
                choices=list(models.keys()),
                value=list(models.keys())[0],
                label="Model Selection",
                interactive=True
            )
            run_button = gr.Button("Run Model", variant="primary")
            output_sample_text = gr.Textbox(label="RMSE")
        
        plot = gr.LinePlot(interactive=True)

    run_button.click(
        update_model,
        inputs=model_dropdown,
        outputs=[output_sample_text, plot]
    )

demo.launch()

* Running on local URL:  http://127.0.0.1:7872
* To create a public link, set `share=True` in `launch()`.


