In [1]:
import polars as pl
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import plotly.express as px

# Set the theme once
pio.templates.default = "plotly_white"

from util import multi_select_counts, get_subplot_coords, df_prop, df_multiselect_prop
import mappings

In [2]:
df = pl.read_csv("OriginalData/Preprogram_Survey.csv")

In [3]:
df.columns

['#',
 'How old are you?',
 'What is your current education status?',
 'Other',
 'What college, high school, or program are you currently attending (if any)?',
 'What is your gender identity?',
 'Other_duplicated_0',
 'Hispanic or Latino',
 'White',
 'Asian or Pacific Islander',
 'American Indian or Alaska Native',
 'Black or African American',
 'Other_duplicated_1',
 'Have you held a paid job before?',
 'Retail / customer service',
 'Food service / hospitality',
 'Office or clerical work',
 'Youth mentoring or childcare',
 'Internship / fellowship',
 'Family business / informal work',
 'Other_duplicated_2',
 'Build professional experience',
 'Improve job readiness and soft skills',
 'Learn about career options',
 'Grow my resume or LinkedIn profile',
 'Network with professionals',
 'Gain mentorship and guidance',
 'Improve my financial knowledge',
 'Other_duplicated_3',
 'Communicating professionally (verbal and written)',
 'Navigating workplace expectations and culture',
 'Time manag

In [4]:
df_prop(df, 'What is your current education status?', 100)

What is your current education status?,len,prop
str,u32,f64
"""High school graduate* (not in …",24,0.24
"""Currently enrolled in 2/4-year…",53,0.53
"""Enrolled in a trade or apprent…",5,0.05
,3,0.03
"""Recently graduated from colleg…",8,0.08
"""In high school""",7,0.07


In [5]:
df_prop(df, 'What is your gender identity?', 100)

What is your gender identity?,len,prop
str,u32,f64
"""Female""",57,0.57
"""Non-binary""",2,0.02
"""Male""",39,0.39
"""Prefer not to answer""",1,0.01
,1,0.01


In [6]:
df = df.with_columns(
    pl.col("What is your current education status?").map_elements(lambda x: "True" if ("Currently" in x) else "False", return_dtype=pl.String).alias("College Student"),
    pl.col("What is your gender identity?").map_elements(lambda x: "Female" if ("Female" == x) else "Not Female", return_dtype=pl.String).alias("Gender"),
    pl.lit(1).alias("Count") 
)

In [7]:
fig = px.histogram(df, x="Gender", y="Count",
             color='College Student', barmode='group', 
             height=400, width = 600, text_auto=True, color_discrete_sequence=px.colors.qualitative.Pastel)
fig.update_traces(textfont_size=12, textposition="inside")
fig.update_layout(yaxis_title="Number of Participants")
fig.show()

  sf: grouped.get_group(s if len(s) > 1 else s[0])


In [8]:
df_prop(df, 'Have you held a paid job before?', 100)

Have you held a paid job before?,len,prop
str,u32,f64
"""No""",18,0.18
"""Yes""",82,0.82


In [9]:
df_prop(df, 'What is your gender identity?', 100)

What is your gender identity?,len,prop
str,u32,f64
"""Prefer not to answer""",1,0.01
,1,0.01
"""Female""",57,0.57
"""Male""",39,0.39
"""Non-binary""",2,0.02


In [10]:
df_multiselect_prop(df, ["Hispanic or Latino", 'White', 'Asian or Pacific Islander', 'American Indian or Alaska Native', 'Black or African American'], 100)

Unnamed: 0,value,count,prop
0,Black or African American,90,0.9
1,Hispanic or Latino,12,0.12
2,White,2,0.02
3,Asian or Pacific Islander,1,0.01


In [11]:
df_prop(df, 'Have you held a paid job before?', 100)

Have you held a paid job before?,len,prop
str,u32,f64
"""Yes""",82,0.82
"""No""",18,0.18


In [12]:
df_multiselect_prop(df, mappings.prev_job_col[1:], 100)

Unnamed: 0,value,count,prop
0,Internship / fellowship,38,0.38
1,Retail / customer service,37,0.37
2,Food service / hospitality,29,0.29
3,Office or clerical work,26,0.26
4,Youth mentoring or childcare,21,0.21
5,Family business / informal work,7,0.07


In [13]:
prev_job = multi_select_counts(df, mappings.prev_job_col, mappings.prev_job_map)

# Create standalone bar chart
fig = go.Figure(
    data=[
        go.Bar(
            x=prev_job.index,
            y=prev_job.values,
            name="Previous Job Experience",
            marker=dict(color='darkblue')
        )
    ]
)

fig.update_layout(
    title="Previous Job Experience",
    xaxis_title="Job Type",
    yaxis_title="Count",
    template="plotly_white"
)

fig.show()

In [14]:
df.columns

['#',
 'How old are you?',
 'What is your current education status?',
 'Other',
 'What college, high school, or program are you currently attending (if any)?',
 'What is your gender identity?',
 'Other_duplicated_0',
 'Hispanic or Latino',
 'White',
 'Asian or Pacific Islander',
 'American Indian or Alaska Native',
 'Black or African American',
 'Other_duplicated_1',
 'Have you held a paid job before?',
 'Retail / customer service',
 'Food service / hospitality',
 'Office or clerical work',
 'Youth mentoring or childcare',
 'Internship / fellowship',
 'Family business / informal work',
 'Other_duplicated_2',
 'Build professional experience',
 'Improve job readiness and soft skills',
 'Learn about career options',
 'Grow my resume or LinkedIn profile',
 'Network with professionals',
 'Gain mentorship and guidance',
 'Improve my financial knowledge',
 'Other_duplicated_3',
 'Communicating professionally (verbal and written)',
 'Navigating workplace expectations and culture',
 'Time manag

In [15]:
df_multiselect_prop(df, ['Build professional experience',
 'Improve job readiness and soft skills',
 'Learn about career options',
 'Grow my resume or LinkedIn profile',
 'Network with professionals',
 'Gain mentorship and guidance',
 'Improve my financial knowledge'], 100)

Unnamed: 0,value,count,prop
0,Build professional experience,68,0.68
1,Improve job readiness and soft skills,43,0.43
2,Grow my resume or LinkedIn profile,39,0.39
3,Network with professionals,33,0.33
4,Gain mentorship and guidance,29,0.29
5,Improve my financial knowledge,20,0.2
6,Learn about career options,13,0.13


In [16]:
goal = multi_select_counts(df, ['#','Build professional experience',
 'Improve job readiness and soft skills',
 'Learn about career options',
 'Grow my resume or LinkedIn profile',
 'Network with professionals',
 'Gain mentorship and guidance',
 'Improve my financial knowledge'])

# Create standalone bar chart
fig = go.Figure(
    data=[
        go.Bar(
            x=goal.index,
            y=goal.values,
            name="Goal of Participation",
            
        )
    ]
)

fig.update_layout(
    title="Goal of Participation",
    xaxis_title="Identified Goal",
    yaxis_title="Count",
    template="plotly_white"
)

fig.show()

In [17]:
career = multi_select_counts(df, mappings.career_consideration_col, mappings.career_consideration_map)

fig = go.Figure(
    data=[
        go.Pie(
            labels=career.index,
            values=career.values,
            hole=0.4,  # Set to 0.4 for a donut chart
            textinfo='percent+label',  # Options: 'label', 'value', 'percent', 'label+percent'
        )
    ]
)

fig.update_layout(
    title="Key Drivers of Career Decisions",
    template="plotly_white",
    width = 800,
    height = 500
)

fig.show()

In [18]:
dev_needs = multi_select_counts(df, mappings.help_needed_col)

fig = go.Figure(
    data=[
        go.Pie(
            labels=dev_needs.index,
            values=dev_needs.values,
            hole=0.4,  # Set to 0.4 for a donut chart
            textinfo='percent+label',  # Options: 'label', 'value', 'percent', 'label+percent'
        )
    ]
)

fig.update_layout(
    title="Development Needs",
    template="plotly_white",
    width = 800,
    height = 500
)

fig.show()

In [19]:
jobs_desired = {
    'Healthcare & Wellness': 35,
    'Business & Finance': 28,
    'Technology & IT': 15,
    'Law & Public Safety': 14,
    'Education & Human Services': 13,
    'Creative Arts & Media': 10,
    'Trades & Skilled Labor': 8,
    'Science & Research (General)': 5,
    'Other/Vague': 4
}

df_job = pl.DataFrame({"Job": jobs_desired.keys(), "count": jobs_desired.values()})

fig = go.Figure(
    data=[
        go.Pie(
            labels=df_job["Job"],
            values=df_job["count"],
            hole=0.4,  # Set to 0.4 for a donut chart
            textinfo='percent+label',  # Options: 'label', 'value', 'percent', 'label+percent'
        )
    ]
)

fig.update_layout(
    title="Job Interest Breakdown by Career Category",
    template="plotly_white",
    width = 800,
    height = 500
)

fig.show()

In [30]:
sc = df.select(mappings.score_card + ["College Student", "Gender", "Count"])
sc = sc.with_columns([
    pl.col(col).replace(mappings.entry_standardization_mapping).replace(mappings.skill_rating_map).cast(pl.Int32).alias(col)
    for col in mappings.score_card
])

r_map = {v:k for k,v in mappings.score_card_colname_name_mapping.items()}

sc = sc.unpivot(on=mappings.score_card, index=["College Student", "Gender", "Count"], value_name="Rating", variable_name="Question").with_columns(pl.col("Question").replace(r_map))

In [31]:
fig = px.histogram(sc, x="Question", y="Rating",
             color='College Student', barmode='group', 
             height=400, histfunc = "avg")
fig.update_traces(textfont_size=12, textposition="inside")
fig.update_layout(yaxis_title="Mean Rating")
fig.show()





In [34]:
df_prop(df, "How often do you save money *(beyond daily spending)*?", 100)

How often do you save money *(beyond daily spending)*?,len,prop
str,u32,f64
"""Sometimes, when I can""",39,0.39
"""Rarely or never""",5,0.05
"""I save with a specific financi…",28,0.28
"""I try to save consistently""",28,0.28


In [45]:
fig = px.histogram(df, x="How often do you save money *(beyond daily spending)*?", y="Count",
             color='College Student', barmode='group', 
             height=400, width = 800, text_auto=True, color_discrete_sequence=px.colors.qualitative.Pastel1)
fig.update_traces(textfont_size=14, textposition="outside")
fig.update_layout(yaxis_title="Number of Participants", yaxis_range=[0, 25])
fig.show()





In [56]:
familiarity_mapping = {
    'Not familiar': 1,
    'Slightly': 2,
    'Somewhat': 3,
    'Very': 4,
    'Extremely': 5
}

fl = df.select(['Creating a personal budget',
 'Understanding credit and credit scores',
 'Opening and managing a bank account',
 'Differences between checking/savings/investment accounts',
 'Using financial apps or online banking tools',
 'Investing your money for future wealth'] + ["College Student", "Gender", "Count"]).unpivot(on=['Creating a personal budget',
 'Understanding credit and credit scores',
 'Opening and managing a bank account',
 'Differences between checking/savings/investment accounts',
 'Using financial apps or online banking tools',
 'Investing your money for future wealth'], index = ["College Student", "Gender", "Count"], variable_name = "Financial Literacy").with_columns( pl.col("value").replace(familiarity_mapping))

In [61]:
fig = px.histogram(fl, x="Financial Literacy", y="value",
             color='Gender', barmode='group', 
             height=400, histfunc = "avg")
fig.update_traces(textfont_size=12, textposition="inside")
fig.update_layout(yaxis_title="Mean Rating", yaxis_range=[0, 5])
fig.show()





In [22]:
# Create subplot grid: 2 rows x 2 columns
fig = make_subplots(rows=1, cols=4,
                    subplot_titles=("Age", "Education", "Gender", "Race"))

# Age Histogram (Row 1, Col 1)
fig.add_trace(
    go.Histogram(x=df["How old are you?"], name="Age"),
    row=1, col=1
)

# Education Bar Chart (Row 1, Col 2)
education_counts = multi_select_counts(df, ["#", "What is your current education status?"], mappings.demograpics_map)
fig.add_trace(
    go.Bar(x=education_counts.index, y=education_counts.values, name="Education"),
    row=1, col=2
)

# Gender Bar Chart (Row 2, Col 1)
gender_counts = multi_select_counts(df, ["#", "What is your gender identity?"])
fig.add_trace(
    go.Bar(x=gender_counts.index, y=gender_counts.values, name="Gender"),
    row=1, col=3
)

# Race Bar Chart (Row 2, Col 2)
race_counts = multi_select_counts(df, ["#", 'Hispanic or Latino', 'White', 'Asian or Pacific Islander', 'American Indian or Alaska Native', 'Black or African American',])
fig.add_trace(
    go.Bar(x=race_counts.index, y=race_counts.values, name="Race"),
    row=1, col=4
)

# Update layout
fig.update_layout(height=400, width=1000, title_text="Demographics Overview")
fig.update_layout(showlegend=False)
fig.show()


In [None]:
df

In [23]:
# Create subplot grid: 2 rows x 2 columns

fig = make_subplots(rows=2, cols=2,
                    subplot_titles=("Previous Job Experience", "Goal of Participation", "Career Considerations", "Development Needs"))

# Previous Job Exp Bar Chart (Row 1, Col 1)
prev_job = multi_select_counts(df, mappings.prev_job_col, mappings.prev_job_map)
fig.add_trace(
    go.Bar(x=prev_job.index, y=prev_job.values, name="Previous Job Experience"),
    row=1, col=1
)

# Goal of Participation Bar Chart (Row 1, Col 2)
goal_of_participation = multi_select_counts(df, mappings.goal_of_participation, mappings.goal_participation_map)
fig.add_trace(
    go.Bar(x=goal_of_participation.index, y=goal_of_participation.values, name="Goal of Participation"),
    row=1, col=2
)

# Career Consideration Bar Chart (Row 2, Col 1)
career = multi_select_counts(df, mappings.career_consideration_col, mappings.career_consideration_map)
fig.add_trace(
    go.Bar(x=career.index, y=career.values, name="Career Considerations"),
    row=2, col=1
)

# Development Needs Bar Chart (Row 2, Col 2)
dev_needs = multi_select_counts(df, mappings.help_needed_col, mappings.help_needed_map)
fig.add_trace(
    go.Bar(x=dev_needs.index, y=dev_needs.values, name="Development Needs"),
    row=2, col=2
)

# Update layout
fig.update_layout(height=600, width=800, title_text="Career Background and Development Needs")
fig.update_layout(showlegend=False)
fig.show()


In [24]:
fig = make_subplots(rows=2, cols=4,
                    subplot_titles=list(mappings.score_card_colname_name_mapping.keys()))

sc = df.select(["#"] + mappings.score_card).to_pandas()
sc = pl.DataFrame(sc.replace(mappings.entry_standardization_mapping))

for index, s in enumerate(mappings.score_card):
    
    position = get_subplot_coords(index, 2, 4)
    
    d = multi_select_counts(sc, ["#", s])
    fig.add_trace(
        go.Bar(y=d.values, x=d.index),
        row=position[0], col=position[1])

In [25]:
# Update layout
fig.update_layout(height=600, width=1000, title_text="Baseline Self-Assessment of Personal and Professional Skills")
fig.update_layout(showlegend=False)
fig.update_xaxes(
    categoryorder='array',
    categoryarray=['Not at all', 'Slightly', 'Moderately','Very', 'Extremely'])
fig.show()

In [26]:
df_numeric = sc[mappings.skill_cols].to_pandas().replace(mappings.skill_rating_map)

# Step 3: Average across all responses per skill
avg_scores = df_numeric.mean()

# Step 4: Plot using Plotly
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    r=avg_scores.tolist(),
    theta=avg_scores.index.tolist(),
    fill='toself',
    name='Average Scores'
))

fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[1, 5],
            tickvals=[1, 5],  
            ticktext=["Low", "High"]  
        )
    ),
    title="Overview Self-Ratings Across Key Skills",
    showlegend=False
)

fig.show()


Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`

