# Graph Exploration - Skill Gap Analysis

This notebook explores graph-based analysis of job skills and relationships.


In [None]:
import sys
sys.path.append('..')

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter

from core.graph_analysis import (
    build_skill_cooccurrence_graph,
    compute_centralities,
    detect_communities,
    get_skill_importance_scores,
    find_bridge_skills
)
from core.analysis import cluster_jobs, interpret_clusters


## Load Sample Data


In [None]:
# Load processed jobs data
# Replace with your actual data file
try:
    df = pd.read_csv('../data/processed_jobs_data_analyst_madrid.csv')
    # Convert skills_detected from string to list if needed
    if 'skills_detected' in df.columns:
        import ast
        df['skills_detected'] = df['skills_detected'].apply(
            lambda x: ast.literal_eval(x) if isinstance(x, str) else x
        )
    print(f"Loaded {len(df)} jobs")
except FileNotFoundError:
    print("No processed data found. Run the main app first to generate data.")
    df = pd.DataFrame()


## Build Skill Co-occurrence Graph


In [None]:
if len(df) > 0:
    G = build_skill_cooccurrence_graph(df)
    
    print(f"Graph nodes (skills): {len(G.nodes())}")
    print(f"Graph edges (co-occurrences): {len(G.edges())}")
    print(f"Graph density: {nx.density(G):.4f}")


## Compute Centralities


In [None]:
if len(df) > 0 and len(G.nodes()) > 0:
    centrality_df = compute_centralities(G)
    print("Top 10 Skills by Degree Centrality:")
    print(centrality_df.head(10))


## Community Detection


In [None]:
if len(df) > 0 and len(G.nodes()) > 0:
    communities = detect_communities(G)
    
    # Count skills per community
    comm_counts = Counter(communities.values())
    print(f"Found {len(comm_counts)} communities")
    print(f"Community sizes: {dict(comm_counts)}")
    
    # Show skills in each community
    for comm_id in sorted(comm_counts.keys()):
        skills_in_comm = [skill for skill, cid in communities.items() if cid == comm_id]
        print(f"\nCommunity {comm_id} ({len(skills_in_comm)} skills):")
        print(f"  {', '.join(skills_in_comm[:10])}")
