Bias in the data

In [1]:
# imports and such

import pandas as pd
import numpy as np

# import the data
CSV_PATH = "feedback.csv"   # change to your file path
df = pd.read_csv(CSV_PATH)

# Quick sanity check
print(df.head())
print(df.columns)


FileNotFoundError: [Errno 2] No such file or directory: 'feedback.csv'

In [None]:
# Define biased / problematic keyword categories
# team members - please feel free to add more negative words

BIAS_KEYWORDS = {
    "personality_judgment": [
        "lazy", "unmotivated", "difficult", "dramatic", "emotional",
        "overly sensitive", "not a team player"
    ],
    "vague_criticism": [
        "bad attitude", "poor fit", "not leadership material",
        "not a good culture fit"
    ],
    "ability_dismissal": [
        "not smart", "slow learner", "incapable", "not technical"
    ],
    # add categories/words that make sense for your environment:
    # e.g. "demeaning_language": ["stupid", "idiot", "useless"]
}


In [None]:
# flag biased data in the dataset
import re

def find_bias_matches(text, keyword_dict):
    """
    Returns:
      has_bias (bool)
      categories (list of category names)
      terms (list of matched keywords/phrases)
    """
    if pd.isna(text):
        return False, [], []

    text_lower = str(text).lower()
    matched_categories = []
    matched_terms = []

    for category, words in keyword_dict.items():
        cat_terms_found = []
        for w in words:
            # simple substring match; you can refine with regex/word boundaries later
            if w.lower() in text_lower:
                cat_terms_found.append(w)
        if cat_terms_found:
            matched_categories.append(category)
            matched_terms.extend(cat_terms_found)

    has_bias = len(matched_terms) > 0
    return has_bias, matched_categories, matched_terms

# Apply to each row
df["has_bias_flag"], df["bias_categories"], df["bias_terms"] = zip(
    *df["feedback_text"].apply(lambda x: find_bias_matches(x, BIAS_KEYWORDS))
)

df.head()


Dashboard creation

In [2]:
# --- CELL 1: Setup, load all three waves, define metrics ---

!pip install plotly==5.24.0 ipywidgets==8.1.2 --quiet

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from ipywidgets import interact, Dropdown

from google.colab import output
output.enable_custom_widget_manager()

# === 1. Paths to your 3 wave files ===
# Change filenames here if needed (e.g., add .csv if it's different)
WAVE_FILES = [
    ("Wave 1", "Wave1Unbiased.csv"),
    ("Wave 2", "Wave2Unbiased.csv"),
    ("Wave 3", "Wave3Unbiased.csv"),
]

frames = []
for wave_label, path in WAVE_FILES:
    tmp = pd.read_csv(path)
    tmp["Wave"] = wave_label   # add wave label column
    frames.append(tmp)

# Combined dataframe with all waves
df = pd.concat(frames, ignore_index=True)

print("Columns:", df.columns.tolist())
print(df[["Name", "Supervisor", "Wave"]].head())

# Parse datetime if present
if "Entry Date and Time" in df.columns:
    df["Entry Date and Time"] = pd.to_datetime(df["Entry Date and Time"], errors="coerce")

# === 2. Metrics (1–5) to use in radar + averages ===
METRIC_COLS = {
    "Attendance Scores": "Attendance",
    "Route Effeciency": "Route Efficiency",
    "Driving Behavior": "Driving Behavior",
    "Safety Check": "Safety",
    "Peer Feedback Averages": "Peer Feedback",
    "Customer Feedback": "Customer Feedback",
    "Compliance": "Compliance",
    "Vehicle Maintenance": "Vehicle Maint.",
    "Load Securing": "Load Securing",
    "Technical Proficiency": "Technical"
}

# Make sure metric columns exist & are numeric
for col in METRIC_COLS.keys():
    if col not in df.columns:
        raise ValueError(f"Column '{col}' not found in combined dataframe. Check spelling or METRIC_COLS.")
    df[col] = pd.to_numeric(df[col], errors="coerce")

# === 3. Lists for dropdowns ===
employee_names = sorted(df["Name"].dropna().unique().tolist())
supervisor_names = sorted(df["Supervisor"].dropna().unique().tolist())
wave_labels = sorted(df["Wave"].unique().tolist())

# === 4. Helper: get record for a specific employee + wave ===
def get_record_for_employee_wave(df, name, wave_label):
    sub = df[(df["Name"] == name) & (df["Wave"] == wave_label)].copy()
    if sub.empty:
        return None
    # If multiple rows in that wave, use the latest by time if available
    if "Entry Date and Time" in sub.columns:
        sub = sub.sort_values("Entry Date and Time")
    return sub.iloc[-1]


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.0/19.0 MB[0m [31m92.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.4/139.4 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m72.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m70.3 MB/s[0m eta [36m0:00:00[0m
[?25h

FileNotFoundError: [Errno 2] No such file or directory: 'Wave1Unbiased.csv'

In [None]:
# --- CELL 2: Employee radar with Employee + Wave dropdowns ---

def create_radar_figure(row, metric_map, wave_label):
    categories = list(metric_map.values())
    values = [row[col] for col in metric_map.keys()]

    # Close the loop for radar
    categories_closed = categories + [categories[0]]
    values_closed = values + [values[0]]

    fig = go.Figure()
    fig.add_trace(go.Scatterpolar(
        r=values_closed,
        theta=categories_closed,
        fill='toself',
        name=row.get("Name", "Employee")
    ))

    fig.update_layout(
        title=f"Skill Profile for {row.get('Name', 'Employee')} – {wave_label}",
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[1, 5]
            )
        ),
        showlegend=False,
        margin=dict(l=40, r=40, t=60, b=40)
    )
    return fig


def show_employee_radar(selected_name, selected_wave):
    row = get_record_for_employee_wave(df, selected_name, selected_wave)
    if row is None:
        print(f"No data for {selected_name} in {selected_wave}")
        return

    fig = create_radar_figure(row, METRIC_COLS, selected_wave)
    fig.show()

    print("----- Employee Info -----")
    print(f"Name: {row.get('Name', '')}")
    print(f"Wave: {row.get('Wave', '')}")
    print(f"Employee ID: {row.get('Employee ID', '')}")
    print(f"Current Route: {row.get('Current Route', '')}")
    print(f"Supervisor: {row.get('Supervisor', '')}")
    print(f"Team ID: {row.get('Team ID', '')}")
    print("-------------------------")


interact(
    show_employee_radar,
    selected_name=Dropdown(
        options=employee_names,
        description='Employee:',
        layout={'width': '280px'}
    ),
    selected_wave=Dropdown(
        options=wave_labels,
        description='Wave:',
        layout={'width': '200px'}
    )
);


interactive(children=(Dropdown(description='Employee:', layout=Layout(width='280px'), options=('Camila Garcia'…

In [None]:
# --- CELL 3: Employee average score over waves ---

def create_time_series_figure(df, name, metric_map):
    sub = df[df["Name"] == name].copy()
    if sub.empty:
        return go.Figure()

    metric_cols = list(metric_map.keys())
    sub["Avg_Skill_Score"] = sub[metric_cols].mean(axis=1)

    # Aggregate per wave so each wave is one point
    if "Wave" in sub.columns:
        agg = sub.groupby("Wave", sort=False)["Avg_Skill_Score"].mean().reset_index()
        # Ensure consistent wave order (Wave 1, Wave 2, Wave 3, Wave 4, Wave 5, Wave 6, Wave 7)
        agg["Wave"] = pd.Categorical(agg["Wave"], categories=wave_labels, ordered=True)
        agg = agg.sort_values("Wave")
        x = agg["Wave"]
        x_label = "Wave"
        y = agg["Avg_Skill_Score"]
    elif "Entry Date and Time" in sub.columns:
        sub = sub.sort_values("Entry Date and Time")
        x = sub["Entry Date and Time"]
        x_label = "Entry Date and Time"
        y = sub["Avg_Skill_Score"]
    else:
        sub = sub.reset_index(drop=True)
        x = sub.index
        x_label = "Review Index"
        y = sub["Avg_Skill_Score"]

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode="lines+markers",
        name=f"{name} avg score",
        marker=dict(size=8)
    ))

    fig.update_layout(
        title=f"Average Skill Score Over Time for {name}",
        xaxis_title=x_label,
        yaxis_title="Average of All Metrics (1–5)",
        yaxis=dict(range=[1, 5]),
        hovermode="x unified"
    )
    return fig


def show_employee_timeseries(selected_name):
    fig = create_time_series_figure(df, selected_name, METRIC_COLS)
    fig.show()


interact(
    show_employee_timeseries,
    selected_name=Dropdown(
        options=employee_names,
        description='Employee:',
        layout={'width': '280px'}
    )
);


interactive(children=(Dropdown(description='Employee:', layout=Layout(width='280px'), options=('Camila Garcia'…

In [None]:
# --- CELL 4: Supervisor radar by Wave (shaded overlays) ---

if "Supervisor" not in df.columns:
    raise ValueError("Column 'Supervisor' not found in df.")

def show_supervisor_radar(selected_supervisor, selected_wave):
    sub = df[(df["Supervisor"] == selected_supervisor) & (df["Wave"] == selected_wave)].copy()
    if sub.empty:
        print(f"No data for supervisor {selected_supervisor} in {selected_wave}")
        return

    # If multiple rows per employee in that wave, use latest by datetime if available
    if "Entry Date and Time" in sub.columns:
        sub = sub.sort_values("Entry Date and Time")
        latest = sub.groupby("Name").tail(1)
    else:
        latest = sub.groupby("Name").tail(1)

    categories = list(METRIC_COLS.values())
    categories_closed = categories + [categories[0]]

    fig = go.Figure()

    for _, row in latest.iterrows():
        values = [row[col] for col in METRIC_COLS.keys()]
        values_closed = values + [values[0]]

        fig.add_trace(go.Scatterpolar(
            r=values_closed,
            theta=categories_closed,
            fill='toself',
            opacity=0.25,    # transparent shading
            name=row.get("Name", "Employee"),
            mode='lines'
        ))

    fig.update_layout(
        title=f"Shaded Skill Profiles – {selected_supervisor} – {selected_wave}",
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[1, 5]
            )
        ),
        showlegend=True,
        margin=dict(l=40, r=40, t=60, b=40)
    )

    fig.show()


interact(
    show_supervisor_radar,
    selected_supervisor=Dropdown(
        options=supervisor_names,
        description='Supervisor:',
        layout={'width': '280px'}
    ),
    selected_wave=Dropdown(
        options=wave_labels,
        description='Wave:',
        layout={'width': '200px'}
    )
);


interactive(children=(Dropdown(description='Supervisor:', layout=Layout(width='280px'), options=('Angela Marti…

In [None]:
# --- CELL 5: Supervisor-level time series (avg metrics per employee over waves) ---

def show_supervisor_timeseries(selected_supervisor):
    sub = df[df["Supervisor"] == selected_supervisor].copy()
    if sub.empty:
        print(f"No data for supervisor {selected_supervisor}")
        return

    metric_cols = list(METRIC_COLS.keys())
    sub["Avg_Skill_Score"] = sub[metric_cols].mean(axis=1)

    fig = go.Figure()

    for emp_name, emp_df in sub.groupby("Name"):
        emp_df = emp_df.copy()

        if "Wave" in emp_df.columns:
            agg = emp_df.groupby("Wave", sort=False)["Avg_Skill_Score"].mean().reset_index()
            agg["Wave"] = pd.Categorical(agg["Wave"], categories=wave_labels, ordered=True)
            agg = agg.sort_values("Wave")
            x = agg["Wave"]
            x_label = "Wave"
            y = agg["Avg_Skill_Score"]
        elif "Entry Date and Time" in emp_df.columns:
            emp_df = emp_df.sort_values("Entry Date and Time")
            x = emp_df["Entry Date and Time"]
            x_label = "Entry Date and Time"
            y = emp_df["Avg_Skill_Score"]
        else:
            emp_df = emp_df.reset_index(drop=True)
            x = emp_df.index
            x_label = "Review Index"
            y = emp_df["Avg_Skill_Score"]

        fig.add_trace(go.Scatter(
            x=x,
            y=y,
            mode="lines+markers",
            name=emp_name
        ))

    fig.update_layout(
        title=f"Average Skill Score Over Time – Employees under {selected_supervisor}",
        xaxis_title=x_label,
        yaxis_title="Average of All Metrics (1–5)",
        yaxis=dict(range=[1, 5]),
        legend_title="Employee"
    )

    fig.show()


interact(
    show_supervisor_timeseries,
    selected_supervisor=Dropdown(
        options=supervisor_names,
        description='Supervisor:',
        layout={'width': '280px'}
    )
);


interactive(children=(Dropdown(description='Supervisor:', layout=Layout(width='280px'), options=('Angela Marti…

In [None]:
# --- CELL 4 (UPDATED): Supervisor-level radar with shaded overlays ---

# Get supervisor list
if "Supervisor" not in df.columns:
    raise ValueError("Column 'Supervisor' not found in df. Check your CSV column names.")

supervisor_names = sorted(df["Supervisor"].dropna().unique().tolist())

def show_supervisor_radar(selected_supervisor):
    # Filter rows for this supervisor
    sub = df[df["Supervisor"] == selected_supervisor].copy()
    if sub.empty:
        print(f"No data for supervisor: {selected_supervisor}")
        return

    # Get latest record per employee
    if "Entry Date and Time" in sub.columns:
        sub = sub.sort_values("Entry Date and Time")
        latest_idx = sub.groupby("Name")["Entry Date and Time"].idxmax()
        latest = sub.loc[latest_idx]
    else:
        latest = sub.groupby("Name").tail(1)

    # Radar categories
    categories = list(METRIC_COLS.values())
    categories_closed = categories + [categories[0]]

    fig = go.Figure()

    # Add shaded radar for each employee
    for _, row in latest.iterrows():
        values = [row[col] for col in METRIC_COLS.keys()]
        values_closed = values + [values[0]]

        fig.add_trace(go.Scatterpolar(
            r=values_closed,
            theta=categories_closed,
            fill='toself',               # <--- SHADED AREA
            opacity=0.25,                # <--- Transparent so they overlap cleanly
            name=row.get("Name", "Employee"),
            mode='lines'
        ))

    fig.update_layout(
        title=f"Shaded Skill Profiles for Employees under Supervisor: {selected_supervisor}",
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[1, 5]
            )
        ),
        showlegend=True,
        margin=dict(l=40, r=40, t=60, b=40)
    )

    fig.show()

# Dropdown widget
interact(
    show_supervisor_radar,
    selected_supervisor=Dropdown(
        options=supervisor_names,
        description='Supervisor:',
        layout={'width': '400px'}
    )
);


interactive(children=(Dropdown(description='Supervisor:', layout=Layout(width='400px'), options=('Angela Marti…

Creating a Little Dashboard web site for BI views