In [239]:
import panel as pn                # The main dashboard framework
import hvplot.pandas             # Adds plotting methods directly to pandas DataFrames
import pandas as pd              # For data manipulation
import numpy as np               # For numerical operations
import seaborn as sns            # For additional plotting capabilities
import holoviews as hv
from scipy import stats

# Initialize Panel extension - this is crucial!
# It enables Jupyter to display Panel objects and interactive widgets
pn.extension()

In [240]:
#Load Dataset

df_original = pd.read_csv('Data/2015.csv')
df_original

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.03880,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...,...
153,Rwanda,Sub-Saharan Africa,154,3.465,0.03464,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,Benin,Sub-Saharan Africa,155,3.340,0.03656,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,Syria,Middle East and Northern Africa,156,3.006,0.05015,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,Burundi,Sub-Saharan Africa,157,2.905,0.08658,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [249]:
#Transform Data

# Set the column as index
df = df_original.set_index('Happiness Rank')

#Drop unused columns
df = df.drop(columns={'Standard Error','Dystopia Residual'})

#Rename to fit Viz
df.rename(columns={'Economy (GDP per Capita)':'GDP_Capita','Happiness Score':'HappinessScore',
                    'Trust (Government Corruption)':'GvrnmntCorruption',
                   'Health (Life Expectancy)':'LifeExpectancy'},inplace=True)


# Normalize 'Govenment Cortuption Index to range of 0.0 - 1.0'
corr_divider = df['GvrnmntCorruption'].max() # 0.55
df['Corruption'] = df.apply(lambda row: 0 if row['GvrnmntCorruption'] <= 0 else (row['GvrnmntCorruption'] / corr_divider), axis=1)
df = df.drop(columns={'GvrnmntCorruption'})

# Make all numbers to 3 digits
df = df.round(3)
df

Unnamed: 0_level_0,Country,Region,HappinessScore,GDP_Capita,Family,LifeExpectancy,Freedom,Generosity,Corruption
Happiness Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Switzerland,Western Europe,7.587,1.397,1.350,0.941,0.666,0.297,0.761
2,Iceland,Western Europe,7.561,1.302,1.402,0.948,0.629,0.436,0.256
3,Denmark,Western Europe,7.527,1.325,1.361,0.875,0.649,0.341,0.876
4,Norway,Western Europe,7.522,1.459,1.331,0.885,0.670,0.347,0.661
5,Canada,North America,7.427,1.326,1.323,0.906,0.633,0.458,0.597
...,...,...,...,...,...,...,...,...,...
154,Rwanda,Sub-Saharan Africa,3.465,0.222,0.774,0.429,0.592,0.226,1.000
155,Benin,Sub-Saharan Africa,3.340,0.287,0.354,0.319,0.484,0.183,0.145
156,Syria,Middle East and Northern Africa,3.006,0.663,0.475,0.722,0.157,0.472,0.343
157,Burundi,Sub-Saharan Africa,2.905,0.015,0.416,0.224,0.118,0.197,0.182


In [246]:
#Build Summary
numerical_cols = list(df.select_dtypes(include=[np.number]).columns)
categorical_cols = list(df.select_dtypes(exclude=[np.number]).columns)

# Check type / missing / unique values in each columns
data_summary = {
    col: {
    'type':str(df[col].dtype),
    'missing':df[col].isna().sum(),
    'unique_values':len(df[col].unique())
    } for col in df.columns
}

display(pd.DataFrame(data_summary).T)

['HappinessScore', 'GDP_Capita', 'Family', 'LifeExpectancy', 'Freedom', 'Generosity', 'Corruption']
['Country', 'Region']


Unnamed: 0,type,missing,unique_values
Country,object,0,158
Region,object,0,10
HappinessScore,float64,0,157
GDP_Capita,float64,0,149
Family,float64,0,150
LifeExpectancy,float64,0,139
Freedom,float64,0,141
Generosity,float64,0,130
Corruption,float64,0,136


In [248]:
# Focus on Govenment Corupption index
print(f" Govenmen Corupption index range from {df['Corruption'].min()} to {df['Corruption'].max()}")
df[['Country','Corruption']].T

 Govenmen Corupption index range from 0.0 to 1.0


Happiness Rank,1,2,3,4,5,6,7,8,9,10,...,149,150,151,152,153,154,155,156,157,158
Country,Switzerland,Iceland,Denmark,Norway,Canada,Finland,Netherlands,Sweden,New Zealand,Australia,...,Chad,Guinea,Ivory Coast,Burkina Faso,Afghanistan,Rwanda,Benin,Syria,Burundi,Togo
Corruption,0.761,0.256,0.876,0.661,0.597,0.75,0.576,0.794,0.778,0.646,...,0.095,0.22,0.325,0.233,0.176,1.0,0.145,0.343,0.182,0.194


In [256]:
#Create grouping Selector

select_var = pn.widgets.Select(
    options = numerical_cols,   # What options to show
    name = 'Variable: Choose the variable to analyze',      # Label
    value = 'Happiness_score',   # Default val
    description='Choose the variable to analyze' # Tooltip help text
)

select_group = pn.widgets.Select(
    options=list(filter(lambda x: df[x].nunique() < 20, df.columns)),
    name = 'Group By',
    value = 'Region'
)

corruption_range = pn.widgets.RangeSlider(
    name = 'Govenment Corruption Rate',
    start = df['Corruption'].min(),
    end = df['Corruption'].max(),
    value = (df['Corruption'].min(),df['Corruption'].max()),
    step=0.1,
    format = '0[.]00'
)

controls = pn.Column(
    *[widget for widget in [select_var, select_group, corruption_range]],
    sizing_mode = 'stretch_width'
)

display(controls)

In [285]:
# Heatmap function

from scipy.cluster import hierarchy
from scipy.stats import spearmanr

def create_correlation_heatmap():
    """Creates an advanced correlation heatmap with clustering.
    
    This function demonstrates several advanced concepts:
    - Hierarchical clustering
    - Multiple statistical calculations
    - Complex data transformation
    - Custom visualization styling
    """
    # Calculate correlations
    pearson_corr = df[numerical_cols].corr('pearson')
    spearman_corr = df[numerical_cols].corr('spearman')
    
    # Perform hierarchical clustering
    # ADVANCED FEATURE: Using scipy for advanced statistics
    linkage = hierarchy.linkage(spearman_corr, method='ward')
    order = hierarchy.leaves_list(linkage)
    
    # Reorder correlation matrix
    ordered_corr = pearson_corr.iloc[order, order]

    # Create the heatmap
    heatmap = ordered_corr.hvplot.heatmap(
        title='Variable Correlations (with Hierarchical Clustering)',
        height=600,
        width=800,
        cmap='RdBu_r',  # Red-Blue diverging colormap
        
        # ADVANCED FEATURE: Complex tooltips with multiple statistics
        tooltips=[
            ('Variables', '@{index} vs @{columns}'),
            ('Pearson Correlation', '@value{0.00}'),
            ('Spearman Correlation', f'@spearman'),
            ('Sample Size', f'@n')
        ],
        
        symmetric=True,      # Ensure matrix is symmetric
        xaxis='bottom',        # Move x-axis to top
        colorbar=True,      # Show colorbar
        clim=(-1, 1)       # Set color limits
    )
    
    return heatmap

# Create and display the heatmap
# correlation_heatmap = create_correlation_heatmap(df)
# correlation_heatmap

In [258]:
# Histrogram function

@pn.depends(select_var, select_group, corruption_range)
def histogram_plot(select_var, select_group, corruption_range):
    """Creates an interactive histogram with density curves.
    
    Args:
        select_var (str): The variable to plot
        select_group (str): The grouping variable
        corruption_range (tuple): Range of Government Corruption Index to include
        
    Returns:
        hvplot: Interactive histogram plot
    """
    # ADVANCED FEATURE: Using boolean indexing with pandas
    filtered_df = df[
        (df['Corruption'] >= corruption_range[0]) & 
        (df['Corruption'] <= corruption_range[1])
    ]
    
    # Create the plot with lots of customization
    plot = filtered_df.hvplot.hist(
        y=select_var,                    # What to plot
        by=select_group,                 # How to group it
        bins=20,                         # Number of bins
        height=300,                      # Plot height
        alpha=0.6,                       # Transparency
        title=f'Distribution of {select_var}',  # Dynamic title
        xlabel=select_var,               # X-axis label
        ylabel='Count',                  # Y-axis label
        # ADVANCED FEATURE: Dictionary unpacking for style
        **{'responsive': True,           # Make it responsive
           'legend_position': 'right'}   # Move legend to right
    )
    
    return plot

In [259]:
# Combine controls and plot into a dashboard
dashboard = pn.Column(
    controls,
    histogram_plot,  # Interactive plot
    sizing_mode='stretch_both'
)

# Display the dashboard
# dashboard



In [302]:
# Boxplot function

@pn.depends(select_var, select_group, corruption_range)
def box_plot(select_var, select_group, corruption_range):
    """Creates an interactive box plot with outlier detection and hover tooltips.
    
    Args:
        select_var (str): Variable to plot on y-axis
        select_group (str): Grouping variable for x-axis
        corruption_range (tuple): Range of Government Corruption Index to include
    
    Returns:
        hvplot: Interactive box plot
    """
    # First, let's filter our data
    # ADVANCED FEATURE: Chain multiple boolean conditions
    filtered_df = df[
        (df['Corruption'] >= corruption_range[0]) & 
        (df['Corruption'] <= corruption_range[1])
    ].copy()  # Create a copy to avoid SettingWithCopyWarning
    
    # ADVANCED FEATURE: Dictionary comprehension for statistics
    # Calculate statistics for hover tooltips
    stats = {
        group: {
            'median': filtered_df[filtered_df[select_group]==group][select_var].median(),
            'mean': filtered_df[filtered_df[select_group]==group][select_var].mean(),
            'std': filtered_df[filtered_df[select_group]==group][select_var].std()
        } for group in filtered_df[select_group].unique()
    }
    
    hover_cols=list(col for col in filtered_df.columns if filtered_df[col].nunique() < 10)
    display(hover_cols)
    
    # Create the box plot with extensive customization
    plot = filtered_df.hvplot.box(
        y=select_var,
        by=select_group,
        height=400,
        box_fill_color='category',  # Color boxes by category
        whisker_color='black',      # Make whiskers black for contrast
        title=f'Distribution of {select_var} by {select_group}',
        
        # ADVANCED FEATURE: Use a generator expression for hover tooltips
        hover_cols=list(col for col in filtered_df.columns if filtered_df[col].nunique() < 10),
        # Customize the appearance
        box_alpha=0.7,              # Slight transparency
        outlier_alpha=0.7,          # Match outlier transparency
        width=400,                  # Fixed width
        legend='bottom',               # Move legend to top
        
        # Add statistical annotations
        tools=['hover'],            # Enable hover tool
        tooltips=[
            ('Group', '@{' + select_group + '}'),
            ('Value', '@{' + select_var + '}{0.00}'),
            ('Count', '@count'),
            ('Median', '@median{0.00}')
        ]
    )
    
    plot.opts(xrotation=90)
    
    return plot

In [304]:
dashboard2 = pn.Column(
    controls,
    box_plot,  # Interactive plot
    sizing_mode='stretch_both'
)
# dashboard2

[]



In [305]:
# Scatter plot function

from bokeh.palettes import Category10
from scipy.stats import linregress

@pn.depends(select_var, corruption_range, select_group)
def create_scatter(x_var, corruption_range, group_var):
    # Filter data based on Corruption Index rate
    filtered_df = df[(df['Corruption'] >= corruption_range[0]) & 
                     (df['Corruption'] <= corruption_range[1])].copy()

    # Determine the y-variable
    y_var = 'Corruption'
    
    # Identify unique groups
    groups = filtered_df[group_var].unique()
    # Assign a palette of colors
    colors = Category10[min(len(groups), 10)]
    
    combined = None
    
    for i, g in enumerate(groups):
        group_data = filtered_df[filtered_df[group_var] == g]
        
        # Create scatter plot for this group
        scatter = group_data.hvplot.scatter(
            x=x_var,
            y=y_var,
            size=100,
            alpha=0.6,
            color=colors[i % len(colors)],
            label=str(g)
        )
        
        # Calculate trend line for this group
        slope, intercept, r_value, p_value, std_err = linregress(group_data[x_var], group_data[y_var])
        x_line = np.array([group_data[x_var].min(), group_data[x_var].max()])
        y_line = slope * x_line + intercept
        line_df = pd.DataFrame({x_var: x_line, y_var: y_line})

        # Create a line plot for the trend line
        trend_line = line_df.hvplot.line(
            x=x_var,
            y=y_var,
            color=colors[i % len(colors)],
            line_width=2,
            label=f'{g} trend'
        )
        
        # Overlay the scatter and trend line
        group_plot = scatter * trend_line
        
        # Combine with previous groups
        if combined is None:
            combined = group_plot
        else:
            combined = combined * group_plot

    # Add options to the combined plot
    if combined is not None:
        combined = combined.opts(
            width=800,
            height=600,
            title=f'Relationship between {x_var} and {y_var}\n',
            tools=['hover', 'box_zoom', 'reset'],
            show_grid=True,
            toolbar='above'
        )
    
    return combined

In [306]:
dashboard3 = pn.Column(
    controls,
    create_scatter,  # Interactive plot
    sizing_mode='stretch_both'
)

In [308]:
# dashboard3

In [310]:
import panel as pn

pn.extension()

class ResponsiveTabbedDashboard:
    def __init__(self, widgets, plots):
        self.widgets = widgets
        self.plots = plots
        self._layout = self.create_layout()

    def create_layout(self):
        sidebar = pn.Column(
            *self.widgets,
            sizing_mode="stretch_width"
        )

        tabs = pn.Tabs(
            ('Overview', pn.Column(
                pn.Row(self.plots['boxplot'], self.plots['histogram'], sizing_mode='stretch_both'),
                sizing_mode='stretch_both'
            )),
            ('Relationships', pn.Row(
                self.plots['scatter'],
                self.plots['correlation'],
                sizing_mode='stretch_both'
            )),
            ('Statistics', pn.Column(
                self.plots['stats'],
                sizing_mode='stretch_both'
            )),
            sizing_mode='stretch_both'
        )

        main_layout = pn.Column(tabs, sizing_mode='stretch_both')

        template = pn.template.VanillaTemplate(
            title="Interactive EDA Dashboard",
            sidebar=[sidebar],
            main=[main_layout],
        )
        return template

    def view(self):
        return self._layout


In [311]:
dashboard = ResponsiveTabbedDashboard(
    widgets=[select_var, select_group, corruption_range],
    plots={
        'histogram': histogram_plot,
        'boxplot': box_plot,
        'scatter': create_scatter,
        'correlation': create_correlation_heatmap,
        'stats': pd.DataFrame(data_summary)
    }
)

dashboard.view()

[]

