In [None]:
import pandas as pd
import altair as alt
from vega_datasets import data as vega_data

# Enable data transformer to handle large datasets
alt.data_transformers.enable('default', max_rows=None)


In [None]:
df = pd.read_csv('postings.csv')

print(f"Loaded {len(df)} job postings")
print(f"Columns: {list(df.columns)}")

def prepare_data(df):
    """Prepare and clean the job postings dataset"""
    data = df.copy()
    
    # Clean salary data
    if 'normalized_salary' in data.columns:
        data['salary'] = data['normalized_salary']
    else:
        data['salary'] = data[['min_salary', 'max_salary', 'med_salary']].mean(axis=1)
    
    # Remove outliers and missing values
    data = data.dropna(subset=['salary', 'formatted_experience_level'])
    data = data[(data['salary'] >= 20000) & (data['salary'] <= 500000)]
    
    # Clean experience level
    data['experience_level'] = data['formatted_experience_level'].fillna('Not Specified')
    
    # Remote flag
    data['remote'] = data['remote_allowed'].fillna(0).astype(bool)
    
    # Calculate engagement rate
    data['engagement_rate'] = (data['applies'] / (data['views'] + 1)) * 100
    data['engagement_rate'] = data['engagement_rate'].clip(0, 100)
    
    # Work type
    data['work_type'] = data['formatted_work_type'].fillna('Not Specified')
    
    return data


In [None]:
data = prepare_data(df)
print("Preparing data for linked visualizations...")

# Sample for scatter plot (avoid overplotting)
viz_data_sample = data.sample(n=min(5000, len(data)), random_state=42).copy()

# Prepare spatial aggregation
data_spatial = data.copy()
data_spatial['state'] = data_spatial['location'].str.split(',').str[-1].str.strip()
data_spatial['state'] = data_spatial['state'].str.upper()

# Clean states
invalid_values = ['UNITED STATES', 'US', 'USA', 'REMOTE', '']
data_spatial = data_spatial[~data_spatial['state'].isin(invalid_values)]
data_spatial = data_spatial[data_spatial['state'].str.len() == 2]

# Add state to sample data
viz_data_sample['state'] = viz_data_sample['location'].str.split(',').str[-1].str.strip().str.upper()
viz_data_sample = viz_data_sample[~viz_data_sample['state'].isin(invalid_values)]
viz_data_sample = viz_data_sample[viz_data_sample['state'].str.len() == 2]

# State statistics
state_stats = data_spatial.groupby('state').agg({
    'job_id': 'count',
    'salary': 'mean',
    'engagement_rate': 'mean',
    'remote': lambda x: (x == True).sum() / len(x) * 100
}).reset_index()
state_stats.columns = ['state', 'job_count', 'avg_salary', 'avg_engagement', 'pct_remote']

# Aggregate statistics for bar charts (use aggregated data to reduce size)
state_work_type_agg = data_spatial.groupby(['state', 'work_type']).agg({
    'job_id': 'count',
    'salary': 'mean'
}).reset_index()
state_work_type_agg.columns = ['state', 'work_type', 'count', 'avg_salary']

state_exp_agg = data_spatial.groupby(['state', 'experience_level']).agg({
    'job_id': 'count',
    'salary': 'mean'
}).reset_index()
state_exp_agg.columns = ['state', 'experience_level', 'count', 'avg_salary']
state_id_map = {
    'AL': 1, 'AK': 2, 'AZ': 4, 'AR': 5, 'CA': 6, 'CO': 8, 'CT': 9, 'DE': 10,
    'FL': 12, 'GA': 13, 'HI': 15, 'ID': 16, 'IL': 17, 'IN': 18, 'IA': 19,
    'KS': 20, 'KY': 21, 'LA': 22, 'ME': 23, 'MD': 24, 'MA': 25, 'MI': 26,
    'MN': 27, 'MS': 28, 'MO': 29, 'MT': 30, 'NE': 31, 'NV': 32, 'NH': 33,
    'NJ': 34, 'NM': 35, 'NY': 36, 'NC': 37, 'ND': 38, 'OH': 39, 'OK': 40,
    'OR': 41, 'PA': 42, 'RI': 44, 'SC': 45, 'SD': 46, 'TN': 47, 'TX': 48,
    'UT': 49, 'VT': 50, 'VA': 51, 'WA': 53, 'WV': 54, 'WI': 55, 'WY': 56,
    'DC': 11
}
state_stats['id'] = state_stats['state'].map(state_id_map)

print(f"Sample data: {len(viz_data_sample)} jobs across {viz_data_sample['state'].nunique()} states")
print(f"State statistics: {len(state_stats)} states")


In [None]:
state_click = alt.selection_point(fields=['state'], name='state_select')

# --- CHOROPLETH MAP (from Task 2) ---
us_states = alt.topo_feature(vega_data.us_10m.url, 'states')

base_map = alt.Chart(us_states).mark_geoshape(
    fill='#f0f0f0',
    stroke='white',
    strokeWidth=1
).project('albersUsa')

choropleth = alt.Chart(us_states).mark_geoshape(
    stroke='white',
    strokeWidth=1,
    cursor='pointer'
).encode(
    color=alt.condition(
        state_click,
        alt.Color('avg_salary:Q',
                  scale=alt.Scale(scheme='blues', domain=[40000, 150000]),
                  legend=alt.Legend(title='Average Salary', format='$,.0f')),
        alt.value('lightgray')
    ),
    opacity=alt.condition(state_click, alt.value(1.0), alt.value(0.3)),
    strokeWidth=alt.condition(state_click, alt.value(3), alt.value(1)),
    tooltip=[
        alt.Tooltip('state:N', title='State'),
        alt.Tooltip('job_count:Q', title='Jobs', format=','),
        alt.Tooltip('avg_salary:Q', title='Avg Salary', format='$,.0f'),
        alt.Tooltip('pct_remote:Q', title='% Remote', format='.1f')
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_stats, 'id', ['state', 'job_count', 'avg_salary', 'pct_remote'])
).project('albersUsa').add_params(state_click)

map_viz = (base_map + choropleth).properties(
    width=900,
    height=450,
    title={
        'text': 'Average Salary by State (Click to Filter Views Below)',
        'subtitle': 'Geographic patterns in compensation - Click any state to explore its jobs in detail'
    }
)


In [None]:
# INTERACTION 2: Brush on Scatter → Highlight on Map
# User Flow: Detail → Geographic context

# Create brush selection
brush = alt.selection_interval(name='brush')

# --- SCATTER PLOT (from Task 1, now with state info) ---
scatter = alt.Chart(viz_data_sample).mark_circle(size=60, opacity=0.7).encode(
    x=alt.X('salary:Q', 
            scale=alt.Scale(domain=[20000, 500000]),
            title='Annual Salary ($)',
            axis=alt.Axis(format='$,.0f')),
    y=alt.Y('engagement_rate:Q',
            scale=alt.Scale(domain=[0, 100]),
            title='Engagement Rate (%)'),
    color=alt.condition(
        brush,
        alt.Color('experience_level:N', 
                  scale=alt.Scale(scheme='tableau10'),
                  title='Experience Level'),
        alt.value('lightgray')
    ),
    tooltip=[
        alt.Tooltip('title:N', title='Job Title'),
        alt.Tooltip('state:N', title='State'),
        alt.Tooltip('salary:Q', title='Salary', format='$,.0f'),
        alt.Tooltip('engagement_rate:Q', title='Engagement %', format='.1f'),
        alt.Tooltip('experience_level:N', title='Experience'),
        alt.Tooltip('work_type:N', title='Work Type')
    ]
).properties(
    width=800,
    height=350,
    title='Salary vs Engagement Rate (Brush to Select, Filtered by State Click Above)'
).add_params(brush).transform_filter(state_click)

# --- BAR CHART (from Task 1, filtered by both selections) ---
bar_chart = alt.Chart(viz_data_sample).mark_bar().encode(
    x=alt.X('count():Q', title='Number of Jobs'),
    y=alt.Y('work_type:N', sort='-x', title='Work Type'),
    color=alt.Color('work_type:N', scale=alt.Scale(scheme='category20'), legend=None),
    tooltip=[
        alt.Tooltip('work_type:N', title='Work Type'),
        alt.Tooltip('count():Q', title='Count'),
        alt.Tooltip('mean(salary):Q', title='Avg Salary', format='$,.0f')
    ]
).properties(
    width=800,
    height=200,
    title='Work Type Distribution (Filtered by Brush Selection Above)'
).transform_filter(brush)


In [None]:
# INTERACTION 3: Summary Statistics Panel (Updates with Selections)
# Text summary showing selected state info
state_summary = alt.Chart(state_stats).mark_text(
    align='left',
    baseline='top',
    fontSize=14,
    dx=5,
    dy=5
).encode(
    text=alt.Text('summary:N')
).transform_filter(
    state_click
).transform_calculate(
    summary='datum.state + " | Jobs: " + format(datum.job_count, ",") + " | Avg Salary: $" + format(datum.avg_salary, ",.0f") + " | Remote: " + format(datum.pct_remote, ".1f") + "%"'
).properties(
    width=800,
    height=30,
    title='Selected State Summary'
)


In [None]:
# COMBINE ALL VIEWS WITH COORDINATED INTERACTIONS

# Combine all
final_dashboard = alt.vconcat(
    map_viz,
    scatter,
    bar_chart,
    spacing=20
).configure_view(
    strokeWidth=0
).configure_axis(
    labelFontSize=11,
    titleFontSize=12
).configure_title(
    fontSize=14,
    anchor='start'
).properties(
    title={
        "text": "Linked Geographic and Job Market Analysis Dashboard",
        "subtitle": "Click states on map → Filter all views | Brush scatter plot → Filter bar chart",
        "fontSize": 16
    }
)

print("\n" + "="*80)
print("LINKED VISUALIZATION DASHBOARD")
print("="*80)
print("\nINTERACTIONS:")
print("1. Click any state on the map to filter all views below")
print("2. Brush (click and drag) on scatter plot to filter bar chart")
print("3. Hover over any element for detailed tooltips")
print("="*80)

final_dashboard