# 6.0 Analytical Dashboard

This was a analytic dashboard used to demonstrate the result.

In [1]:
# python -m pip install pandas
# python -m pip install scipy
# python -m pip install matplotlib
# python -m pip install sklearn
# curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# python -m pip install --upgrade pip
# python -m pip install ipywidgets
# pip install voila
# !jupyter nbextension enable --py widgetsnbextension --sys-prefix
# !jupyter serverextension enable voila --sys-prefix

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import warnings
from sklearn.preprocessing import MinMaxScaler


from IPython.display import display, clear_output
import ipywidgets as widgets

%matplotlib inline
warnings.filterwarnings('ignore')

In [2]:
# Cleaned data
data = pd.read_csv("../dataset/df_day_data.csv")

# With outliers
X_train = pd.read_csv("../dataset/with_outliers/X_train.csv")
X_test = pd.read_csv("../dataset/with_outliers/X_test.csv")
Y_train = pd.read_csv("../dataset/with_outliers/y_train.csv")
Y_test = pd.read_csv("../dataset/with_outliers/y_test.csv")
Y_train_ori = pd.read_csv("../dataset/with_outliers/Y_train_ori.csv")
Y_test_ori = pd.read_csv("../dataset/with_outliers/Y_test_ori.csv")

# Without outliers
no_X_train = pd.read_csv("../dataset/no_outliers/X_train.csv")
no_X_test = pd.read_csv("../dataset/no_outliers/X_test.csv")
no_Y_train = pd.read_csv("../dataset/no_outliers/y_train.csv")
no_Y_test = pd.read_csv("../dataset/no_outliers/y_test.csv")
no_Y_train_ori = pd.read_csv("../dataset/no_outliers/Y_train_ori.csv")
no_Y_test_ori = pd.read_csv("../dataset/no_outliers/Y_test_ori.csv")

In [3]:
# Personalised data
cols  = ["temperature","humidity","windspeed","weather","season"]
label = ["Temperature",
         "Humidity   ",
         "Windspeed  ",
         "Weather    ",
         "Season     "]
data_2011     = [1,1,1,1,1]
data_2012     = [1,1,1,1,1]
mnth_2011 = [1,2,3,4,5,6,7,8,9,10,11,12]
mnth_2012 = [1,2,3,4,5,6,7,8,9,10,11,12]
mnth_2013 = [1,2,3,4,5,6,7,8,9,10,11,12]
year_2011_sum = 0
year_2012_sum = 0
year_2013_sum = 0
year_2013_avg = 0
mnth = ["Jan: ", "Feb: ", "Mar: ", "Apr: ", "May: ", "Jun: ", "Jul: ", "Aug: ", "Sep: ", "Oct: ", "Nov: ", "Dec: "]

# Preparing new dataframe
new_frame   = np.round(pd.DataFrame(data, columns = ['month','count','temperature','humidity','windspeed','weather','season','year']), 2)
frame_2011  = new_frame.drop(new_frame[(new_frame['year'] == 1)].index)
frame_2012  = new_frame.drop(new_frame[(new_frame['year'] == 0)].index)
    
# Set condition
condition_2011  = (new_frame['year'].values == 0)
condition_2012  = (new_frame['year'].values == 1)

# Obtaining mean for "temperature","humidity","windspeed"
def cal_mean_median(new_frame, year, a):
    print ("========================")
    print (" Year", year, " Information ")
    print ("========================")
    i    = 0
    for col in cols:
        if col == cols[3] or col == cols[4]:
            a[i] = new_frame[col].median()
#             print(label[i] + " : ",new_frame[col].median())
            i += 1
        else:
            a[i] = new_frame[col].mean()
#             print(label[i] + " : ",new_frame[col].mean())
            i += 1
    print ("")


# Personalised prediction
def result(result_frame):
    sum_temp_count = 0
    avg_temp_count = 0
    count = 0
    for n in pd.to_numeric(result_frame):
        if n != -1:
            sum_temp_count += n
            count += 1
    if count != 0:
        avg_temp_count = sum_temp_count / count
        print ("Sum  of bike rental count (per year):", sum_temp_count)
        print ("Average bike rental count (per day ):", np.round(avg_temp_count,0))
    else: 
        print ("No data recorded")
    return sum_temp_count
        
# Personalised prediction
def line_graph(frame_2011, frame_2012, title, year_2011_sum, year_2012_sum):
    
    
    # 2011-----------------------------------------------------------------------
    regressor_2011 = RandomForestRegressor()
    X_scaler_2011 = MinMaxScaler()

    # To assign x and y 
    x_2011 = frame_2011.iloc[:, 0:1].values 
    x_2011 = X_scaler_2011.fit_transform(x_2011)
    y_2011 = frame_2011.iloc[:, 1:2].values  

    # Fit the regressor with x and y data
    regressor_2011.fit(x_2011, y_2011)  

    # What to predict...in our case is count
    Y_pred_2011 = regressor_2011.predict(np.array([8.5]).reshape(1, 1))

    # Visualising the Random Forest Regression results
    # Arange values from min value of x to max value of x with a difference of 0.01 between two consecutive values
    X_grid_2011 = np.arange(min(x_2011), max(x_2011), 0.01) 

    # Reshape for reshaping the data into a len(X_grid)*1 array            
    X_grid_2011 = X_grid_2011.reshape((len(X_grid_2011), 1))

    # Scatter plot for original data
    plt.scatter(x_2011, y_2011, color = '#c8f7c8')  
    plt.plot(X_grid_2011, regressor_2011.predict(X_grid_2011), color = 'red', label='2011') 
    
    # -----------------------------------------------------------------------
    
    # 2012-----------------------------------------------------------------------
    regressor_2012 = RandomForestRegressor()
    X_scaler_2012 = MinMaxScaler()

    # To assign x and y 
    x_2012 = frame_2012.iloc[:, 0:1].values 
    x_2012 = X_scaler_2012.fit_transform(x_2012)
    y_2012 = frame_2012.iloc[:, 1:2].values  

    # Fit the regressor with x and y data
    regressor_2012.fit(x_2012, y_2012)  

    # What to predict...in our case is count
    Y_pred_2012 = regressor_2012.predict(np.array([8.5]).reshape(1, 1))

    # Visualising the Random Forest Regression results
    # Arange values from min value of x to max value of x with a difference of 0.01 between two consecutive values
    X_grid_2012 = np.arange(min(x_2012), max(x_2012), 0.01) 

    # Reshape for reshaping the data into a len(X_grid)*1 array            
    X_grid_2012 = X_grid_2012.reshape((len(X_grid_2012), 1))

    # Scatter plot for original data
    plt.scatter(x_2012, y_2012, color = '#c8f7c8')  
    plt.plot(X_grid_2012, regressor_2012.predict(X_grid_2012), color = 'black', label='2012') 
    # -----------------------------------------------------------------------
    
    plt.title(title)
    plt.xlabel("Month")
    plt.ylabel("Bike Rental Count")
    plt.legend()
    plt.show()
    
    
    # 2013-----------------------------------------------------------------------
    cnt = 1
    c = 0
    for n in np.round(regressor_2011.predict(X_grid_2011),0):
        if (cnt == 1 or cnt == 9 or cnt == 18 or cnt == 27 or cnt == 36 or cnt == 45 or cnt == 55 or cnt == 64 or cnt == 72 or cnt == 82 or cnt == 91 or cnt == 100):
            mnth_2011[c] = n
            c += 1
        cnt += 1
            
    cnt = 1
    c = 0
    for n in np.round(regressor_2012.predict(X_grid_2012),0):
        if (cnt == 1 or cnt == 9 or cnt == 18 or cnt == 27 or cnt == 36 or cnt == 45 or cnt == 55 or cnt == 64 or cnt == 72 or cnt == 82 or cnt == 91 or cnt == 100):
            mnth_2012[c] = n
            c += 1
        cnt += 1
        
    print("--------------------------")
    print(" Prediction for year 2013 ")
    print("--------------------------")
    z = 0
    predicted_sum = 0
    for n in mnth:
        mnth_2013[z] = (mnth_2011[z] + mnth_2012[z])/2
        predicted_sum += mnth_2013[z]
        z += 1
        
    year_2013_sum = (year_2011_sum + year_2012_sum)/2
    year_2013_avg = (predicted_sum)/12
    print ("Sum  of bike rental count (per year):", year_2013_sum)
    print ("Average bike rental count (per day ):", np.round(year_2013_avg,0))
        
    #-----------------------------------------------------------------------
    
# Personalised prediction
def prediction_2013():
    title = "Graph of 2011 - 2012"
    line_graph(frame_2011, frame_2012, title, year_2011_sum, year_2012_sum) 

# To display
cal_mean_median(frame_2011, "2011", data_2011)

result_frame_2011 = np.where(condition_2011,new_frame['count'], -1)
year_2011_sum = result(result_frame_2011)
    
cal_mean_median(frame_2012, "2012", data_2012)
result_frame_2012 = np.where(condition_2012,new_frame['count'], -1)
year_2012_sum = result(result_frame_2012)


 Year 2011  Information 

Sum  of bike rental count (per year): 1234184
Average bike rental count (per day ): 3419.0
 Year 2012  Information 

Sum  of bike rental count (per year): 2007800
Average bike rental count (per day ): 5624.0


In [4]:
# Model: Random Forest Regressor Model
cols_K = ["temperature","humidity","windspeed","weather","weekday","holiday","month","year","season"]
K_ref  = ["season","year","month","holiday","weekday","weather","windspeed","humidity","temperature"]
K_f    = [0,1,2,3,4,5,6,7,8]
K_b    = [1,2,3,4,5,6,7,8,9]

btn_generate = widgets.Button(description="Generate",
                               tooltip='Generate',
                               layout=widgets.Layout(width="wrap-content", height="wrap-content"), 
                               button_style="")

def on_generate_clicked(event):
    with output:
        clear_output()
        selected_approach = widgets.HTML(value="<h3>Selected analysis approach: "
                                         + "<i style='color:red;'>" + a2_description + "</i>" + " </h3>")
        display(selected_approach)
        intro()
        btn = widgets.HBox([btn_generate, content_reset])
        model = widgets.VBox([user_input, btn])
        display(model)
        personalised_prediction(user_input.value)

btn_generate.on_click(on_generate_clicked)

def intro():
    
    print ("=====================")
    print (" Dataset Information ")
    print ("=====================")
    print ("Dataset Used   : day.csv")
    print ("Dataset Length :", len(data))
    print ("Dataset Shape  :", data.shape)
    c = ""
    for col in cols_K:
        c = col + " " + c
    print ("Dataset Column :", c)
    print ("")

def visualise_prediction(x,y,title,x_axis,y_axis,text):
    if (x_axis == "year"):
        print("")
    else:
        # Create regressor object
        regressor = RandomForestRegressor(n_estimators = 100, random_state = 0)

        # Fit the regressor with x and y data
        regressor.fit(x, y)  

        # What to predict...in our case is count
        Y_pred = regressor.predict(np.array([8.5]).reshape(1, 1))

        # Visualising the Random Forest Regression results
        # Arange values from min value of x to max value of x with a difference of 0.01 between two consecutive values
        X_grid = np.arange(min(x), max(x), 0.01) 

        # Reshape for reshaping the data into a len(X_grid)*1 array            
        X_grid = X_grid.reshape((len(X_grid), 1))

        # Scatter plot for original data
        plt.scatter(x, y, color = '#c8f7c8')    

        # Plot predicted data
        plt.plot(X_grid, regressor.predict(X_grid), color = 'red') 
        plt.title(title)
        plt.xlabel(x_axis)
        plt.ylabel(y_axis)
    
        # Draw circle for windspeed, humidity and temperature
        if   x_axis == "windspeed"  :
            plt.scatter( 0.55  , 4225 , s=30000 ,  facecolors='none', edgecolors='blue', linewidth=2) 
            text = text + " Most data recorded between 0.35 to 0.85 windspeed score as shown by the blue circle."
        elif x_axis == "humidity"   :
            plt.scatter( 0.60 , 4217 , s=30000 ,  facecolors='none', edgecolors='blue', linewidth=2)
            text = text + " Most data recorded between 0.30 to 0.90 humidity score as shown by the blue circle."
        elif x_axis == "temperature":
            plt.scatter( 0.40  , 4120 , s=30000 ,  facecolors='none', edgecolors='blue', linewidth=2)
            text = text + " Most data recorded between 0.10 to 0.70 temperature score as shown by the blue circle."

        plt.show()
    
        # Observation text
        print(text)
    
    # Display logic for explanation
    if x_axis == "season":
        print("--------------------------")
        print(" Prediction for next year ")
        print("--------------------------")
        mnth = ["Winter(1): ", 
                "Spring(2): ", 
                "Summer(3): ", 
                "Fall  (4): "]
        cnt = 1
        c = 0
        for n in np.round(regressor.predict(X_grid),0):
            if (cnt == 1 or cnt == 33 or cnt == 67 or cnt == 100):
                print(mnth[c], n)
                c += 1
            cnt += 1
    elif x_axis == "year":
        prediction_2013()
        
    elif x_axis == "month":
        print("--------------------------")
        print(" Prediction for next year ")
        print("--------------------------")
        mnth = ["Jan: ", "Feb: ", "Mar: ", "Apr: ", "May: ", "Jun: ", "Jul: ", "Aug: ", "Sep: ", "Oct: ", "Nov: ", "Dec: "]
        cnt = 1
        c = 0
        for n in np.round(regressor.predict(X_grid),0):
            if (cnt == 1 or cnt == 9 or cnt == 18 or cnt == 27 or cnt == 36 or cnt == 45 or cnt == 55 or cnt == 64 or cnt == 72 or cnt == 82 or cnt == 91 or cnt == 100):
                print(mnth[c], n)
                c += 1
            cnt += 1
    elif x_axis == "holiday":
        print("--------------------------")
        print(" Prediction for next year ")
        print("--------------------------")
        mnth = ["Holiday(1): ", 
                "Workday(0): "]
        cnt = 1
        c = 0
        for n in np.round(regressor.predict(X_grid),0):
            if (cnt == 1 or cnt == 100):
                print(mnth[c], n)
                c += 1
            cnt += 1
    elif x_axis == "weekday":
        print("--------------------------")
        print(" Prediction for next year ")
        print("--------------------------")
        mnth = ["Sunday    (0): ",
                "Monday    (1): ",
                "Tuesday   (2): ",
                "Wednesday (3): ",
                "Thursday  (4): ",
                "Friday    (5): ",
                "saturday  (6): "]
        cnt = 1
        c = 0
        for n in np.round(regressor.predict(X_grid),0):
            if (cnt == 1 or cnt == 17 or cnt == 33 or cnt == 50 or cnt == 67 or cnt == 83 or cnt == 100):
                print(mnth[c], n)
                c += 1
            cnt += 1
    elif x_axis == "weather":
        print("--------------------------")
        print(" Prediction for next year ")
        print("--------------------------")
        mnth = ["Clear, Few clouds, Partly cloudy, Partly cloudy                                        (1): ", 
                "Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist                           (2): ", 
                "Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds(3): ", 
                "Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog                             (4): "]
        cnt = 1
        c = 0
        for n in np.round(regressor.predict(X_grid),0):
            if (cnt == 1 or cnt == 33):
                print(mnth[c], n)
                c += 1
            elif (cnt == 100):
                print(mnth[c], n)
                print(mnth[c+1], "0.0 (No record)")
                c += 1
            cnt += 1
    elif x_axis == "windspeed":
        print("----------------------------")
        print(" Analysis of windspeed data ")
        print("----------------------------")
        
        # Creating the Series
        sr_Y = pd.Series(np.round(regressor.predict(X_grid),0))
        sr_X = x

        # Print the series
        print("1. Windspeed Score Analysis")
        print("Mode   : ", stats.mode(sr_X))
        print("Mean   : ", np.round(np.mean(sr_X),2))
        print("Median : ", np.round(np.median(sr_X),2))
        print("")
        
        # return the value of 90 % quantile
        print("2. Bike Rental Count Analysis")
        print("Q[50%] : ", sr_Y.quantile(0.5))
        print("")
        
    elif x_axis == "humidity":
        print("---------------------------")
        print(" Analysis of humidity data ")
        print("---------------------------")
        
        # Creating the Series
        sr_Y = pd.Series(np.round(regressor.predict(X_grid),0))
        sr_X = x

        # Print the series
        print("1. Humidity Score Analysis")
        print("Mode   : ", stats.mode(sr_X))
        print("Mean   : ", np.round(np.mean(sr_X),2))
        print("Median : ", np.round(np.median(sr_X),2))
        print("")
        
        # return the value of 90 % quantile
        print("2. Bike Rental Count Analysis")
        print("Q[50%] : ", sr_Y.quantile(0.5))
        print("")
        
    elif x_axis == "temperature":
        print("------------------------------")
        print(" Analysis of temperature data ")
        print("------------------------------")
        
        # Creating the Series
        sr_Y = pd.Series(np.round(regressor.predict(X_grid),0))
        sr_X = x

        # Print the series
        print("1. Temperature Score Analysis")
        print("Mode   : ", stats.mode(sr_X))
        print("Mean   : ", np.round(np.mean(sr_X),2))
        print("Median : ", np.round(np.median(sr_X),2))
        print("")
        
        # return the value of 90 % quantile
        print("2. Bike Rental Count Analysis")
        print("Q[50%] : ", sr_Y.quantile(0.5))
        print("")
        
def personalised_prediction(key):
    title_with_outliers  = "Prediction with data contain outlier "
    title_no_outliers    = "Prediction with data without outlier"
    x_axis               = key
    y_axis               = "Rental Count"
    text                 = "This red line was the line of prediction."
        
    # Navigate to respective column key coordinates
    if key in cols_K:
        i = K_ref.index(key)
        x_f = K_f[i]
        x_b = K_b[i]
            
        # For test data with outliers
        x = X_train.iloc[:, x_f:x_b].values 
        y = Y_train_ori.iloc[:,:1].values 
        visualise_prediction(x,y,title_with_outliers,x_axis,y_axis,text)
        

        # For test data without outliers
        x_no = no_X_train.iloc[:, x_f:x_b].values
        y_no = no_Y_train_ori.iloc[:,:1].values
        if (key == "year"):
            print("")
        else:
            visualise_prediction(x_no,y_no,title_no_outliers,x_axis,y_axis,text)
    else:
        print("Invalid Column Name")

In [5]:
# Setup header
file = open("../resource/widget_setup/header_1.png", "rb")
image = file.read()

image_headline = widgets.Image(
    value=image,
    format='jpg',
    width='800')

header = widgets.HBox([image_headline], description="content",
                      layout=widgets.Layout(width='auto', height="auto")
                     )

# Setup footer
footer_headertext = widgets.HTML(value="<h2>Observation: </h2>")

In [6]:
# Setup content
a1_description = "Model Evaluation"
a2_description = "Random Forest Regressor Model   " + "(89-90% accuracy)"

file_mse = open("../resource/fig/mse.png", "rb")
img_mse = file_mse.read()
file_accuracy = open("../resource/fig/accuracy.png", "rb")
img_accuracy = file_accuracy.read()

user_input = widgets.Text(placeholder="Enter the data column act as the x_axis for prediction: ")

# Content A: Various Analysis Approach
a1 = widgets.Button(description=a1_description,
                    tooltip='Approach1',
                    layout=widgets.Layout(width="auto", height="auto"),
                    button_style="success")
a2 = widgets.Button(description=a2_description,
                    tooltip='Approach2',
                    layout=widgets.Layout(width="auto", height="auto"),
                    button_style="primary")

content_A = widgets.VBox([a1, a2])

NameError: name 'typeof' is not defined

In [None]:
# Content B: Selected Approach & Output
output = widgets.Output()
imge_to_show1 = widgets.Image(
    value=img_mse,
    format='png',
    width='auto')
imge_to_show2 = widgets.Image(
    value=img_accuracy,
    format='png',
    width='auto')

def on_a1_clicked(event):
    with output:
        clear_output()
        try:
            selected_approach = widgets.HTML(value="<h3>Evaluation : "+
                                             "<i style='color:red;'>" + a1_description +
                                             "</i>" + " </h3>")
            display(selected_approach)
            #--Write Your Code Here !------------------------------
            # Model 1: Artificial Neural Networks Model
            
            evaluation = widgets.HBox([imge_to_show1, imge_to_show2],
                                      layout=widgets.Layout(width='auto', height="auto"))
            display(evaluation)
            
            #--End Of Code-----------------------------------------
        except Exception as e:
            print("No analysis available...")
            
def on_a2_clicked(event):
    with output:
        clear_output()
        try:
            selected_approach = widgets.HTML(value="<h3>Selected analysis approach: "
                                             + "<i style='color:red;'>" + a2_description + "</i>" + " </h3>")
            display(selected_approach)
            
            intro()
            btn = widgets.HBox([btn_generate, content_reset])
            model = widgets.VBox([user_input, btn])
            display(model)
            
        except Exception as e:
            print("No analysis available...")

In [None]:
a1.on_click(on_a1_clicked)
a2.on_click(on_a2_clicked)

content_B = widgets.VBox([output])

# Stacked content
content_headertext = widgets.HTML(value="<h1>Choose an analysis model~</h1>")
content_reset = widgets.Button(description="Reset",
                               tooltip='Reset',
                               layout=widgets.Layout(width="wrap-content", height="wrap-content"), 
                               button_style="danger")

def on_reset_clicked(event):
    with output:
        clear_output()
        try:
            print()
        except Exception as e:
            print("No analysis available...")

content_reset.on_click(on_reset_clicked)
content_header = widgets.HBox([content_headertext])
content = widgets.VBox([content_header, content_A, content_B], description="content",
                       layout=widgets.Layout(width='auto', height="auto"))

page = widgets.VBox([header, content])
display(page)

In [None]:
# to remove not needed requirements
# !pip list --format=freeze > requirements.txt