In [None]:
"""
SYS 660 - College Decision Support System
Final Project Code
Authors: Stephanie McDonough, Anthony Rizzuto, Justin Baumann
Date: 04/15/2025
"""

In [148]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import HTML as IPHTML
from sklearn.preprocessing import MinMaxScaler

# Load and prepare data
df = pd.read_csv('College Data Full-US.csv', low_memory=False)

# Clean and parse majors
def parse_programs(programs):
    if pd.isna(programs) or programs.strip() == "N/A":
        return set()
    return set(map(str.strip, programs.split(',')))

df["Parsed Majors"] = df["Bachelor's Programs Offered"].apply(parse_programs)

# Collect all majors across institutions
def get_all_majors(df):
    majors = set()
    for major_set in df["Parsed Majors"]:
        majors.update(major_set)
    return sorted(majors)

display(IPHTML("""
<style>
    body, .widget-label {
        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
        color: #cfdee7;
    }
               
    h1 {
        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
        color: #0a369d !important;
    }

    h2, h3, h4 {
        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
        color: #4472ca !important;
    }

    .widget-box, .widget-tab, .widget-vbox, .widget-hbox {
    }

    .widget-select, .widget-dropdown, .widget-text, .widget-slider {
        border: none;
    }

   .custom-reset-button {
        background-color: #0a369d !important;
        color: white !important;
        padding: 8px 30px;
        border-radius: 4px;
        border: none;
        display: flex !important;
        align-items: center !important;
        justify-content: center !important;
        height: 40px; 
        transition: background-color 0.3s ease;
    }
    .custom-reset-button:hover {
        background-color: #4472ca !important;
        cursor: pointer;
    }

    .filter-box {
        box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.15);;
        border-radius: 4px;
        background-color: #ffffff;  
    }

    .widget-button {
        font-weight: bold;
    }

    select option:checked{
               background-color: #92b4f4 !important;
               color: black !important;
               font-weight: bold;
    }

.widget-tab > .p-TabBar-tab.p-mod-current {
    background-color: #0a369d !important;
    color: #ffffff !important;
    border-bottom: 2px solid white !important;
}

.widget-tab > .p-TabBar-tab:hover {
    background-color: #dbe7ff;
    cursor: pointer;
}

</style>
"""))


# Widgets
state_options = ['All States'] + sorted(df["State abbreviation (HD2023)"].dropna().unique().tolist())
state_label = widgets.HTML('<b>Select State(s):</b>')
state_widget = widgets.SelectMultiple(
    options=state_options,
    value=[],
    layout=widgets.Layout(width='500px', height='200px')
)
state_widget_box = widgets.VBox([state_label, state_widget])


city_widget = widgets.Text(
    value='',
    description='City (optional):',
    layout=widgets.Layout(width='400px')
)

major_options = ['All Majors'] + get_all_majors(df)
major_label = widgets.HTML('<b>Select Major(s):</b>')
major_widget = widgets.SelectMultiple(
    options= ['All Majors'] + get_all_majors(df),
    value=[],
    layout=widgets.Layout(width='500px', height='200px')
)
major_widget_box = widgets.VBox([major_label, major_widget])

max_tuition_label = widgets.HTML('<b>Max Tuition ($):</b>')
max_tuition_slider = widgets.IntSlider(value=60000, min=0, max=100000, step=1000, layout=widgets.Layout(width='300px'))
max_tuition_slider.style.handle_color = '#92b4f4'
max_tuition_widget = widgets.HBox([max_tuition_label, max_tuition_slider])

min_acceptance_label = widgets.HTML('<b>Min Acceptance Rate (%):</b>')
min_acceptance_slider = widgets.FloatSlider(value=0, min=0, max=100, step=0.1, layout=widgets.Layout(width='300px'))
min_acceptance_slider.style.handle_color = '#92b4f4'
min_acceptance_widget = widgets.HBox([min_acceptance_label, min_acceptance_slider])

min_earnings_label = widgets.HTML('<b>Min Median Earnings ($):</b>')
min_earnings_slider = widgets.IntSlider(value=10000, min=0, max=200000, step=1000, layout=widgets.Layout(width='300px'))
min_earnings_slider.style.handle_color = '#92b4f4'
min_earnings_widget = widgets.HBox([min_earnings_label, min_earnings_slider])

size_label = widgets.HTML('<b>Enrollment Range:</b>')
size_slider = widgets.IntRangeSlider(value=(0,12000), min=0, max=200000, step=1000, layout=widgets.Layout(width='300px'))
size_slider.style.handle_color = '#92b4f4'
size_widget = widgets.HBox([size_label, size_slider])


# Athletic program participation widgets
football_widget = widgets.Dropdown(
    options=['No Preference', 'Yes', 'No'],
    value='No Preference',
    description='Football:',
    layout=widgets.Layout(width='300px')
)

basketball_widget = widgets.Dropdown(
    options=['No Preference', 'Yes', 'No'],
    value='No Preference',
    description='Basketball:',
    layout=widgets.Layout(width='300px')
)

baseball_widget = widgets.Dropdown(
    options=['No Preference', 'Yes', 'No'],
    value='No Preference',
    description='Baseball:',
    layout=widgets.Layout(width='300px')
)

track_widget = widgets.Dropdown(
    options=['No Preference', 'Yes', 'No'],
    value='No Preference',
    description='Track/XC:',
    layout=widgets.Layout(width='300px')
)


def on_state_change(change):
    if 'All States' in change['new']:
        with state_widget.hold_trait_notifications():
            state_widget.value = tuple([s for s in state_options if s != 'All States'])

def on_major_change(change):
    if 'All Majors' in change['new']:
        with major_widget.hold_trait_notifications():
            major_widget.value = tuple([m for m in major_options if m != 'All Majors'])

state_widget.observe(on_state_change, names='value')
major_widget.observe(on_major_change, names='value')

# Weights
tuition_label = widgets.HTML('<b>Tuition Importance:</b>')
tuition_weight_slider = widgets.FloatSlider(value=5, min=0, max=10, step=0.5, layout=widgets.Layout(width='200px'))
tuition_weight_widget = widgets.HBox([tuition_label, tuition_weight_slider])

acceptance_label = widgets.HTML('<b>Acceptance Rate Importance:</b>')
acceptance_weight_slider = widgets.FloatSlider(value=5, min=0, max=10, step=0.5, layout=widgets.Layout(width='200px'))
acceptance_weight_widget = widgets.HBox([acceptance_label, acceptance_weight_slider])

earnings_label = widgets.HTML('<b>Earnings Importance:</b>')
earnings_weight_slider = widgets.FloatSlider(value=5, min=0, max=10, step=0.5, layout=widgets.Layout(width='200px'))
earnings_weight_widget = widgets.HBox([earnings_label, earnings_weight_slider])

major_label = widgets.HTML('<b>Major Match Importance:</b>')
major_weight_slider = widgets.FloatSlider(value=5, min=0, max=10, step=0.5, layout=widgets.Layout(width='200px'))
major_weight_widget = widgets.HBox([major_label, major_weight_slider])

size_importance_label = widgets.HTML('<b>School Size Importance:</b>')
size_weight_slider = widgets.FloatSlider(value=5, min=0, max=10, step=0.5, layout=widgets.Layout(width='200px'))
size_weight_widget = widgets.HBox([size_importance_label, size_weight_slider])


# Score calculation
def calculate_recommendation_scores(filtered_df, weights, selected_majors):
    scaler = MinMaxScaler()
    total_weight = sum(weights.values())
    normalized_weights = {k: v / total_weight if total_weight else 1/4 for k, v in weights.items()}

    matrix = filtered_df[[
        'Published in-state tuition and fees 2023-24 (IC2023_AY)',
        'Percent admitted - total (DRVADM2023)',
        'MD_EARN_WNE_P10',
        'Undergraduate enrollment (DRVEF2023)'
    ]].copy()

    def has_major(major_set):
        if not selected_majors:
            return 1
        return int(any(m in major_set for m in selected_majors))

    matrix['major_score'] = filtered_df["Parsed Majors"].apply(has_major)

    for col, norm_col, invert in [
        ('Published in-state tuition and fees 2023-24 (IC2023_AY)', 'tuition_norm', True),
        ('Percent admitted - total (DRVADM2023)', 'acceptance_norm', False),
        ('MD_EARN_WNE_P10', 'earnings_norm', False),
        ('Undergraduate enrollment (DRVEF2023)', 'size_norm', False)
    ]:
        if col in matrix.columns and matrix[col].notna().sum() > 1:
            norm = scaler.fit_transform(matrix[[col]].fillna(matrix[col].mean()))
            matrix[norm_col] = (1 - norm if invert else norm).flatten()
        else:
            matrix[norm_col] = 1.0

    matrix['score'] = (
        matrix['tuition_norm'] * normalized_weights['tuition'] +
        matrix['acceptance_norm'] * normalized_weights['acceptance'] +
        matrix['earnings_norm'] * normalized_weights['earnings'] +
        matrix['major_score'] * normalized_weights['major'] +
        matrix['size_norm'] * normalized_weights['size']
    )
    return matrix['score']

# Update display
def update_recommendations(change):
    with out:
        clear_output(wait=True)
        filtered = df.copy()

        if state_widget.value:
            selected_states = [s for s in state_widget.value if s != 'All States']
            if selected_states:
              filtered = filtered[filtered["State abbreviation (HD2023)"].isin(selected_states)]

        if city_widget.value:
            filtered = filtered[filtered["City location of institution (HD2023)"].str.contains(city_widget.value, case=False, na=False)]

        if 'Published in-state tuition and fees 2023-24 (IC2023_AY)' in filtered.columns:
            filtered = filtered[
              filtered['Published in-state tuition and fees 2023-24 (IC2023_AY)'].fillna(float('inf')) <= max_tuition_slider.value]

        if 'Percent admitted - total (DRVADM2023)' in filtered.columns:
            filtered = filtered[
              filtered['Percent admitted - total (DRVADM2023)'].fillna(0) >= min_acceptance_slider.value]

        min_size, max_size = size_slider.value
        if 'Undergraduate enrollment (DRVEF2023)' in filtered.columns:
            filtered = filtered[
                filtered['Undergraduate enrollment (DRVEF2023)'].fillna(0).between(min_size, max_size)
            ]


        if 'MD_EARN_WNE_P10' in filtered.columns:
            filtered = filtered[
              filtered['MD_EARN_WNE_P10'].fillna(0) >= min_earnings_slider.value]

        selected_majors = [m for m in major_widget.value if m != 'All Majors']
        if selected_majors:
            filtered = filtered[filtered["Parsed Majors"].apply(lambda x: any(m in x for m in selected_majors))]


        if filtered.empty:
            display(pd.DataFrame({"Message": ["No colleges match your criteria."]}))
            return

        # Athletic program filters
        sport_filters = {
          'NCAA/NAIA member for football (IC2023)': football_widget.value,
          'NCAA/NAIA member for basketball (IC2023)': basketball_widget.value,
          'NCAA/NAIA member for baseball (IC2023)': baseball_widget.value,
          'NCAA/NAIA member for cross country/track (IC2023)': track_widget.value}

        for col, val in sport_filters.items():
          if val != 'No Preference':
            filtered = filtered[filtered[col].fillna('N/A').str.lower() == val.lower()]

        weights = {
            'tuition': tuition_weight_slider.value,
            'acceptance': acceptance_weight_slider.value,
            'earnings': earnings_weight_slider.value,
            'major': major_weight_slider.value,
            'size': size_weight_slider.value
        }

        filtered = filtered.copy()
        filtered['recommendation_score'] = calculate_recommendation_scores(filtered, weights, selected_majors)
        filtered = filtered.sort_values('recommendation_score', ascending=False)

        # Add sport badges (emoji tags)
        def get_sport_badges(row):
            badges = []
            sports_columns = [
                'NCAA/NAIA member for football (IC2023)',
                'NCAA/NAIA member for basketball (IC2023)',
                'NCAA/NAIA member for baseball (IC2023)',
                'NCAA/NAIA member for cross country/track (IC2023)'
            ]

            if all(pd.isna(row[col]) or str(row[col]).strip().lower() != 'yes' for col in sports_columns):
                return '-'  

            if str(row.get('NCAA/NAIA member for football (IC2023)', '')).lower() == 'yes':
                badges.append('🏈')
            if str(row.get('NCAA/NAIA member for basketball (IC2023)', '')).lower() == 'yes':
                badges.append('🏀')
            if str(row.get('NCAA/NAIA member for baseball (IC2023)', '')).lower() == 'yes':
                badges.append('⚾')
            if str(row.get('NCAA/NAIA member for cross country/track (IC2023)', '')).lower() == 'yes':
                badges.append('🏃')

            return ' '.join(badges) if badges else '-'


        filtered['Athletics'] = filtered.apply(get_sport_badges, axis=1)
        filtered['recommendation_score'] = filtered['recommendation_score'].round(4)

        display_cols = {
            'Institution Name': 'University',
            'City location of institution (HD2023)': 'City',
            'State abbreviation (HD2023)': 'State',
            'recommendation_score': 'Score',
            'Athletics': 'Athletics',
            'Published in-state tuition and fees 2023-24 (IC2023_AY)': 'In-State Tuition',
            'Percent admitted - total (DRVADM2023)': 'Acceptance Rate',
            'Undergraduate enrollment (DRVEF2023)': 'Enrollment',
            'MD_EARN_WNE_P10': 'Median Earnings (10 yrs)',
            'MN_EARN_WNE_P10': 'Minimum Earnings (10 yrs)'
        }

        top_filtered = (filtered[display_cols.keys()].rename(columns=display_cols).head(20).reset_index(drop=True))

        top_filtered.index = top_filtered.index + 1 
        top_filtered.insert(0, "Rank", top_filtered.index)
        
        output_table = (
             top_filtered.style.format({
                'Score': '{:.4f}',
                'In-State Tuition': '${:,.0f}',
                'Acceptance Rate': '{:.1f}%',
                'Enrollment': '{:.0f}',
                'Median Earnings (10 yrs)': '${:,.0f}',
                'Minimum Earnings (10 yrs)': '${:,.0f}'
            }, na_rep='-')
            .hide(axis='index')
            .set_table_styles([
                {'selector': 'th', 'props': [('text-align', 'center')]},
                {'selector': 'table', 'props': [('margin-left', 'auto'), ('margin-right', 'auto')]}
            ]))
        html_table = output_table.to_html()

        display(IPHTML(f"""
        <div style="
            border: 3px solid #0a369d;
            border-radius: 4px;
            padding: 5px 20px 20px 20px;
            margin: 10px 50px 0px 50px;
            width: 90%;
            background-color: #ffffff;
            box-shadow: 0 4px 12px rgba(0,0,0,0.1);
        ">
        <h2 style="text-align:center; color: #0a369d;">Top Matching Universities</h2>
         {html_table}
        </div>"""))

reset_button = widgets.Button(description="Reset Filters & Weights")
reset_button.add_class('custom-reset-button')

reset_button.layout = widgets.Layout(width='350px', margin='20px auto 0 auto')

def reset(button):
    state_widget.value = ()
    city_widget.value = ''
    major_widget.value = ()
    max_tuition_slider.value = 60000
    min_acceptance_slider.value = 0
    min_earnings_slider.value = 10000
    size_slider.value = (0, 12000)
    football_widget.value = 'No Preference'
    basketball_widget.value = 'No Preference'
    baseball_widget.value = 'No Preference'
    track_widget.value = 'No Preference'

    tuition_weight_slider.value = 5
    acceptance_weight_slider.value = 5
    earnings_weight_slider.value = 5
    major_weight_slider.value = 5
    size_weight_slider.value = 5

reset_button.on_click(reset)

# UI setup
left_column = widgets.VBox([
     widgets.HTML('<h2>Location & Program</h2>'),
    state_widget_box,
    city_widget,
    major_widget_box
])

left_column_box = widgets.Box(children=(left_column,), layout=widgets.Layout(
    border= '3px solid #0a369d',
    padding='5px 15px 30px 20px',
    margin='0px 0px 0px 50px',
    width='45%',
))
left_column_box.add_class('filter-box')

right_column = widgets.VBox([
    widgets.HTML('<h2>Cost & Admission </h2>'),
    max_tuition_widget,
    min_acceptance_widget,
    size_widget,
    min_earnings_widget,
    widgets.HTML('<h2>Athletic Participation (NCAA/NAIA)</h2>'),
    football_widget,
    basketball_widget,
    baseball_widget,
    track_widget
])

right_column_box = widgets.Box(children=(right_column,), layout=widgets.Layout(
    border= '3px solid #0a369d',
    padding='5px 15px 30px 20px',
    margin='0px 50px 0px 10px',
    width='45%'
))
right_column_box.add_class('filter-box')

filters_row = widgets.HBox([left_column_box, right_column_box], layout=widgets.Layout(justify_content='space-between', gap='40px'))


filters = widgets.VBox([
    widgets.HTML('<h1 style="text-align:center; margin-bottom: 20px; color:#272757;">College Match Filters</h1>'),
    filters_row,
    reset_button
])


weights = widgets.VBox([
    widgets.HTML('<h3>Criteria Importance (0-10)</h3>'),
    tuition_weight_widget,
    acceptance_weight_widget,
    earnings_weight_widget,
    major_weight_widget,
    size_weight_widget
])

ui = widgets.Tab(children=[filters, weights])
ui.set_title(0, 'Filters')
ui.set_title(1, 'Scoring Weights')

out = widgets.Output()

for w in [
    state_widget, city_widget, major_widget,
    max_tuition_slider, min_acceptance_slider, min_earnings_slider,
    tuition_weight_slider, acceptance_weight_slider, earnings_weight_slider, major_weight_slider ,football_widget, basketball_widget, baseball_widget, track_widget, size_widget
]:
    w.observe(update_recommendations, names='value')

display(ui, out)
update_recommendations(None)

Tab(children=(VBox(children=(HTML(value='<h1 style="text-align:center; margin-bottom: 20px; color:#272757;">Co…

Output()