In [1]:
!git clone https://github.com/tanisha0804/Industry-Academia-alignment.git

Cloning into 'Industry-Academia-alignment'...
remote: Enumerating objects: 70, done.[K
remote: Counting objects: 100% (70/70), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 70 (delta 20), reused 50 (delta 9), pack-reused 0 (from 0)[K
Receiving objects: 100% (70/70), 2.33 MiB | 6.89 MiB/s, done.
Resolving deltas: 100% (20/20), done.


In [2]:
import json
import numpy as np
from pathlib import Path
from collections import defaultdict

In [3]:
repo_root = Path(".")
processed_dir = repo_root / "/content/Industry-Academia-alignment/outputs/processed_data"

with open(processed_dir / "skill_gap_analysis.json") as f:
    gap_data = json.load(f)

missing_skills = [s.lower() for s in gap_data["missing_skills"]]

print("Missing skills loaded:", len(missing_skills))

Missing skills loaded: 27


In [4]:
#synethetic DEMAND SIMULATION
''' creating sample job-market demand data artificially to test how the system
 analyzes skill trends, when real company hiring data is not available currently
 mimics how industry demand for skills changes over time so the model logic can be tested
 This method is used for the time being, can be altered with real company data in future
'''

# Simulated monthly demand signals (last 6 months)
np.random.seed(42)

skill_time_series = {}

for skill in missing_skills:
    skill_time_series[skill] = np.random.randint(
        low=5, high=100, size=6
    ).tolist()


In [5]:
'''Computing trend metrics
growth_rate = Measures how fast demand is increasing.
freshness_score = Rewards recently emerging skills
volatility_index = Measures demand instability over time'''

trend_metrics = {}

for skill, series in skill_time_series.items():
    past = series[0]
    current = series[-1]

    growth_rate = (current - past) / (past + 1)
    volatility = float(np.std(series))
    freshness = 1 / (1 + len(series))  # proxy for recency

    trend_metrics[skill] = {
        "growth_rate": round(growth_rate, 3),
        "freshness_score": round(freshness, 3),
        "volatility_index": round(volatility, 3)
    }


In [6]:
def compute_priority(m):
    return (
        0.5 * m["growth_rate"] +
        0.3 * m["freshness_score"] -
        0.2 * m["volatility_index"]
    )

In [7]:
#Ranking skills by their market priority
skill_priority = {
    skill: compute_priority(metrics)
    for skill, metrics in trend_metrics.items()
}

ranked_skills = sorted(
    skill_priority.items(),
    key=lambda x: x[1],
    reverse=True
)

for s, score in ranked_skills[:10]:
    print(f"{s:30s} → priority = {score:.3f}")


circuit analysis               → priority = -1.202
power bi                       → priority = -1.727
pml                            → priority = -2.108
google analytics               → priority = -3.330
unit testing                   → priority = -3.453
design patterns                → priority = -3.685
docker                         → priority = -3.869
plant data modeling            → priority = -4.042
data visualization             → priority = -4.125
aveva e3dpdms                  → priority = -4.278


In [9]:
#normalizing the scores depending on priority
import numpy as np

raw_scores = np.array(list(skill_priority.values()))

min_s = raw_scores.min()
max_s = raw_scores.max()

normalized_priority = {
    skill: (score - min_s) / (max_s - min_s)
    for skill, score in skill_priority.items()
}


In [14]:
YEAR_SKILL_BUCKETS = {
    2: {
        "excel", "power bi", "google analytics",
        "htmlcss", "command line", "data visualization",
        "adobe photoshop", "cms management", "seo fundamentals"
    },

    3: {
        "git", "unit testing", "api testing", "design patterns",
        "docker", "debugging", "cicd pipelines",
        "c# .net", "swift", "xcode ide"
    },

    4: {
        "terraform", "llm finetuning", "plant data modeling",
        "aveva e3dpdms", "pml", "cad modeling"
    }
}

In [15]:
skill_year_mapping = {}

for skill in missing_skills:
    assigned = False
    for year, bucket in YEAR_SKILL_BUCKETS.items():
        if skill in bucket:
            skill_year_mapping[skill] = year
            assigned = True
            break

    if not assigned:
        # Default fallback
        skill_year_mapping[skill] = 3

In [17]:
phase4_recommendations = []

for skill in normalized_priority:
    phase4_recommendations.append({
        "skill": skill,
        "priority_score": round(normalized_priority[skill], 3),
        "recommended_year": skill_year_mapping.get(skill, 3)
    })

In [18]:
from pathlib import Path
import json

repo_root = Path(".")
output_dir = repo_root / "/content/Industry-Academia-alignment/outputs/processed_data"
output_dir.mkdir(parents=True, exist_ok=True)

with open(output_dir / "skill_priority_scores.json", "w") as f:
    json.dump(normalized_priority, f, indent=2)

with open(output_dir / "skill_year_mapping.json", "w") as f:
    json.dump(skill_year_assignment, f, indent=2)

with open(output_dir / "skill_recommendations.json", "w") as f:
    json.dump(phase4_recommendations, f, indent=2)

print("Phase 4 completed successfully")


Phase 4 completed successfully
