In [None]:
# Imports
import pyinputplus as pyip
import matplotlib.pyplot as plt
import string
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [None]:
# Read in training dataset for modeling
TM_2021_filepath = 'https://raw.githubusercontent.com/sr2cute702/C964sr/main/TM_2021.csv'
# x from train dataset features
x = pd.read_csv(TM_2021_filepath)
# y from train dataset output
y = x['Max_Consumer_VAT_Price']

In [None]:
# Whitegrid style for better visual contrast
sns.set(style="whitegrid")
## Dataset Features Overview
# Describe numeric features in train dataset
x.describe()

In [None]:
# Display train dataset distribution of sales prices
plt.hist(y, bins = 25);

In [None]:
# Normalize train dataset distribution of sales prices
offset_log = 10000
plt.hist(np.log2(y + offset_log), bins = 25);

In [None]:
# Display heatmap of variable intercorrelations to aid in regressor selection for target variable
plt.figure(figsize = (18, 18))
train_corr = x.corr()
sns.heatmap(train_corr, cmap = "coolwarm", annot=True, fmt='.1f', linewidths = 0.05);

In [None]:
# From the above heatmap, we select a correlation of .60 with SalePrice as significant enough to warrant regression inclusion
sig_corr = train_corr.index[train_corr['Max_Consumer_VAT_Price'] >= .6]
features = []
for reg in sig_corr:
    features.append(reg)
    
# Plot relation between features and SalePrice
for reg in features:
    sns.lmplot(x = reg, y = 'Max_Consumer_VAT_Price', data = x)

In [None]:
# Drop outliers
x = x.drop(x['Max_Retailer_Price'][x['Max_Retailer_Price'] > 4000 ].index)
x = x.drop(x['Max_Consumer_Price'][x['Max_Consumer_Price'] > 4000 ].index)
# Update output
y = x['Max_Consumer_VAT_Price']
# Significant regressors
sig_x = x[[ 'Max_Retailer_Price', 'Max_Consumer_Price', 'Year', ]]
feat_labels = ['Max_Retailer_Price', 'Max_Consumer_Price', 'Year']
# Split the data into testing and training
x_train, x_test, y_train, y_test = train_test_split(sig_x, y, test_size=0.5, random_state=0)
# Create random forest classifier
regressor = RandomForestRegressor(n_estimators=150, random_state=42)
# Clean NaN
x_train = np.nan_to_num(x_train)
y_train = np.nan_to_num(y_train)
# Train classifier
regressor.fit(x_train, y_train)

In [None]:
# Calculate_prediction takes in a series of widget harvested inputs and provides them for regression analysis, producing
# through regression analysis a real estate price prediction from the dataset 
def calculate_prediction(Max_Retailer_Price, Max_Consumer_Price, Year):
   # Predict with input
    predicted_price = regressor.predict([[
        Max_Retailer_Price.value, 
        Max_Consumer_Price.value, 
        Year.value]])
    return predicted_price

In [None]:
# Widget sliders collect input and are passed to calculate_prediction function with button press
Max_Retailer_Price = widgets.IntSlider(min=0, max=860000, step=1, description='Max Retail Price')
#display(Max_Retailer_Price)
Max_Consumer_Price = widgets.IntSlider(min=0, max=9990000, step=1, description='Max Consumer Price')
#display(Max_Consumer_Price)
Year = widgets.IntSlider(min=2007, max=2021, step=1, description='Year')
#display(Year)
widget_box = widgets.VBox([Max_Retailer_Price, Max_Consumer_Price, Year])
# List of widget box children for tab
children = [widget_box]
# Initialize tab and set children
tab = widgets.Tab()
tab.children = children
# Box title
tab.set_title(0, 'Medication Values')
tab

In [None]:
def on_button_clicked(_):
    # Link function with output
    pred = calculate_prediction(Package, Max_Retailer_Price, Max_Consumer_Price, Year)
    # what happens when we press the button
    with out:
        out.clear_output()
        print("--Prediction--")
        print("Our model predicts approximately ", pred, "dollars for the medication with these attributes.")
        print("--Error Analysis--")
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_train))
    return

# calculate_prediction execution button harvest input from sliders
calc_button = widgets.Button(description='Calculate')
out = widgets.Output()

# Link button and function together using button method
calc_button.on_click(on_button_clicked)
# Display button and output together
widgets.VBox([calc_button,out])