In [None]:
!pip install langchain_community
!pip install replicate
!pip install openpyxl
!pip install tqdm



In [None]:
from langchain_community.llms import Replicate
import os
from google.colab import userdata
# Set the API token
api_token = userdata.get('api_token')
os.environ["api_token"] = api_token
# Model setup
model = "ibm-granite/granite-3.2-8b-instruct"
output = Replicate(
model=model,
replicate_api_token=api_token,
)

In [None]:
# SKRIP REVISI 1
import time
import pandas as pd
import os
from tqdm.notebook import tqdm
from google.colab import files, userdata
from langchain_community.llms import Replicate

# === 1. Upload File Excel ===
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# === 2. Load Data ===
df = pd.read_excel(file_name, sheet_name="Sheet1")

if "Deskripsi" not in df.columns:
    raise ValueError("Kolom 'Deskripsi' tidak ditemukan di Sheet1")

# === 3. Set API Token ===
api_token = userdata.get('api_token')
os.environ["REPLICATE_API_TOKEN"] = api_token

# === 4. Setup Model ===
model = Replicate(
    model="ibm-granite/granite-3.2-8b-instruct",
    replicate_api_token=api_token
)

# === 5. Prompt Template ===
base_prompt = """
Classify the job descriptions in the "Deskripsi" column into three categories: "hard skills", "soft skills", and "non-skills".

Definitions:
- Hard skills: measurable abilities or knowledge areas that can be evaluated using performance indicators (examples: excel, SEO, digital marketing).
- Soft skills: interpersonal or personal attributes that cannot be directly measured by KPIs but influence how people work (examples: communication, teamwork, leadership).
- Non-skills: requirements that are neither hard skills nor soft skills (examples: degree, minimum age, years of experience).

Rules:
- Output must only contain three lines in this format:
  hard skills: ...
  soft skills: ...
  non-skills: ...
- Each tag must be maximum 2 words.
- Do not write explanations or full sentences, only tags separated by commas.
- Translate all tags into English if they are in Indonesian.
- Extract skills or requirements directly from the text only. Do not invent or guess.
- If no tag is found for a category, leave it blank after the colon.
Text:
"""

# === 6. Process Each Row with Rate-Limit Handling ===
hard_skills_list, soft_skills_list, non_skills_list = [], [], []

for i, desc in enumerate(tqdm(df["Deskripsi"], desc="Processing rows")):
    prompt = base_prompt + str(desc)

    while True:  # retry loop jika kena rate limit
        try:
            result = model.invoke(
                prompt,
                top_k=5,
                top_p=0.3,
                temperature=0,
                max_tokens=150,
                min_tokens=30,
                random_seed=42,
                repetition_penalty=1.2,
                stopping_sequence=["non-skills:"]
            )
            break
        except Exception as e:
            if "429" in str(e):
                print(f"Rate limit reached at row {i+1}, waiting 15 seconds...")
                time.sleep(15)
            else:
                raise e

    # Parsing hasil
    hs, ss, ns = "", "", ""
    result_lower = result.lower()

    if "hard skills:" in result_lower:
        try:
            hs = result.split("hard skills:")[1].split("soft skills:")[0].strip()
        except:
            pass
    if "soft skills:" in result_lower:
        try:
            ss = result.split("soft skills:")[1].split("non-skills:")[0].strip()
        except:
            pass
    if "non-skills:" in result_lower:
        try:
            ns = result.split("non-skills:")[1].strip()
        except:
            pass

    hard_skills_list.append(hs)
    soft_skills_list.append(ss)
    non_skills_list.append(ns)

    time.sleep(1)  # jeda kecil tiap request

# === 7. Tambahkan ke DataFrame ===
df["Hard Skills"] = hard_skills_list
df["Soft Skills"] = soft_skills_list
df["Non-Skills"] = non_skills_list

# === 8. Simpan & Download ===
output_file = "classified_skills_revisi_1_0.7.xlsx"
df.to_excel(output_file, index=False)
files.download(output_file)


In [None]:
#PREPROCESS KE-2 (REVISI 1)
import pandas as pd
import os
import re
import time
from tqdm.notebook import tqdm
from google.colab import files, userdata
from langchain_community.llms import Replicate

# === 1. Upload File Excel Hasil Granite Sebelumnya ===
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

df = pd.read_excel(file_name)

# Pastikan kolom hasil Granite ada
for col in ["Hard Skills", "Soft Skills", "Non-Skills"]:
    if col not in df.columns:
        raise ValueError(f"Kolom {col} tidak ditemukan di file Excel.")

# === 2. Ekstrak semua skill unik ===
all_skills = set()

def split_skills(cell):
    if pd.isna(cell):
        return []
    return [s.strip() for s in str(cell).split(",") if s.strip()]

for col in ["Hard Skills", "Soft Skills", "Non-Skills"]:
    for val in df[col]:
        all_skills.update(split_skills(val))

all_skills = sorted(list(all_skills))

print(f"Total unique skills extracted: {len(all_skills)}")

# === 3. Setup Model ===
api_token = userdata.get('api_token')
os.environ["REPLICATE_API_TOKEN"] = api_token

model = Replicate(
    model="ibm-granite/granite-3.2-8b-instruct",
    replicate_api_token=api_token
)

# === 4. Prompt untuk Normalisasi ===
base_prompt = """
You are given a list of skill tags separated by commas.
Your task is to normalize them so that all variations, synonyms, or similar expressions are grouped into one consistent normalized tag.

Rules:
- Input list items are separated by commas.
- Output format must only contain lines with "original → normalized".
- If multiple items clearly refer to the same concept (e.g. "degree", "bachelor’s degree", "management degree preferred"), normalize them into one short consistent tag (max 2 words).
- Always choose the most general base form, not the specific variation. Example: "bachelor’s degree", "degree requirement", "management degree" → "degree".
- Normalize to English only.
- Do not invent new tags, just unify.

Now normalize the following list:
"""

skills_text = "\n".join(all_skills)
prompt = base_prompt + "\n" + skills_text

# === 5. Kirim ke Granite dengan Rate Limit Handling ===
while True:
    try:
        result = model.invoke(
            prompt,
            top_k=1,
            top_p=0.1,
            temperature=0,
            max_tokens=800,
            min_tokens=30,
            random_seed=42,
            repetition_penalty=1.2,
            stopping_sequence=None
        )
        break
    except Exception as e:
        if "429" in str(e):
            print("Rate limit reached, waiting 15 seconds...")
            time.sleep(15)
        else:
            raise e

print("=== Granite Normalization Output ===")
print(result)

# === 6. Parsing hasil "original → normalized" ===
mapping = {}
for line in result.splitlines():
    if "→" in line:
        parts = line.split("→")
        if len(parts) == 2:
            original = parts[0].strip().lower()
            normalized = parts[1].strip().lower()
            mapping[original] = normalized

print(f"\nParsed {len(mapping)} mappings.")

# === 7. Apply Mapping ke DataFrame ===
def normalize_cell(cell):
    if pd.isna(cell):
        return ""
    skills = [s.strip().lower() for s in str(cell).split(",") if s.strip()]
    normalized = [mapping.get(s, s) for s in skills]
    # hapus duplikat & rapikan format
    return ", ".join(sorted(set(normalized)))

df["Hard Skills (Normalized)"] = df["Hard Skills"].apply(normalize_cell)
df["Soft Skills (Normalized)"] = df["Soft Skills"].apply(normalize_cell)
df["Non-Skills (Normalized)"] = df["Non-Skills"].apply(normalize_cell)

# === 8. Simpan Hasil ===
output_file = "classified_skills_normalized_revisi_1_gabungan.xlsx"
df.to_excel(output_file, index=False)
files.download(output_file)


Saving classified_skills_revisi_1_GABUNGAN.xlsx to classified_skills_revisi_1_GABUNGAN.xlsx
Total unique skills extracted: 4654
=== Granite Normalization Output ===
0-2 years of experience → experience
000 → not specified
000 - 15 → not specified
000 - RP 1 → not specified
000 - RP 15 → not specified
000 - RP 2 → not specified
000 - RP 3 → not specified
000 - RP 4 → not specified
000 - RP 5 → not specified
000 - Rp 10 → not specified
000 - Rp 4 → not specified
000 per month → salary
000 per month) → salary
000) → not specified
000/month) → salary
08:00 - 17:00 WIB → work hours
08:00-17:00 → work hours
1 Year Experience → experience
1 year as supervisor → experience
1 year experience → experience
1 year marketing experience → experience
1 year minimum experience → experience
1 year of automotive sales experience → experience
1 year of digital marketing experience → experience
1 year of e-commerce experience → experience
1 year of experience → experience
1 year of experience as a content

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# PREPROCESS KE-3
# =========================
# 1) Install & Import
# =========================
!pip install pandas openpyxl --quiet

import pandas as pd
import re
from google.colab import files

# =========================
# 2) Upload & Read Excel
# =========================
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# Coba baca Sheet1; jika tidak ada, baca sheet pertama
try:
    df = pd.read_excel(file_name, sheet_name="Sheet1")
except Exception:
    xls = pd.ExcelFile(file_name)
    df = pd.read_excel(file_name, sheet_name=xls.sheet_names[0])

print("Kolom tersedia:", df.columns.tolist())

# =========================
# 3) Helper: clean & split
# =========================
def clean_tag(tag: str) -> str:
    """Lowercase, trim spasi, buang spasi berlebih."""
    tag = str(tag).strip().lower()
    tag = re.sub(r"\s+", " ", tag)   # rapikan spasi ganda
    return tag

def explode_tags(series: pd.Series) -> pd.Series:
    """
    Ambil Series berisi string 'a, b, c', pecah jadi satu kolom berisi tag per baris,
    bersihkan, dan buang kosong.
    """
    vals = []
    for cell in series.dropna():
        # split berdasarkan koma
        parts = [clean_tag(p) for p in str(cell).split(",")]
        # buang tag kosong
        parts = [p for p in parts if p]
        vals.extend(parts)
    return pd.Series(vals, dtype="string")

# =========================
# 4) Hitung frekuensi per kategori
# =========================
target_cols = {
    "Hard Skills (Normalized)":  "Hard Skills (Frequency)",
    "Soft Skills (Normalized)":  "Soft Skills (Frequency)",
    "Non-Skills (Normalized)":   "Non-Skills (Frequency)",
}

results = {}

for col, outname in target_cols.items():
    if col not in df.columns:
        print(f"Peringatan: kolom '{col}' tidak ditemukan. Melewati kategori ini.")
        continue

    exploded = explode_tags(df[col])
    if exploded.empty:
        freq_df = pd.DataFrame({"skill_tag": [], "frequency": []})
    else:
        freq_df = (exploded.value_counts()
                   .rename_axis("skill_tag")
                   .reset_index(name="frequency")
                   .sort_values("frequency", ascending=False)
                   .reset_index(drop=True))

    results[outname] = freq_df
    # Ringkasan singkat
    print(f"\nTop 10 {outname}")
    print(freq_df.head(10))

# =========================
# 5) Simpan ke Excel & download
# =========================
output_xlsx = "skills_frequency.xlsx"
with pd.ExcelWriter(output_xlsx, engine="openpyxl") as writer:
    for sheet_name, freq_df in results.items():
        # Nama sheet max 31 karakter (Excel)
        safe_sheet = sheet_name[:31]
        freq_df.to_excel(writer, index=False, sheet_name=safe_sheet)

print("\nFile tersimpan:", output_xlsx)
files.download(output_xlsx)


Saving classified_skills_normalized_revisi_1_gabungan.xlsx to classified_skills_normalized_revisi_1_gabungan (2).xlsx
Kolom tersedia: ['Posisi', 'Perusahaan', 'Lokasi', 'Tanggal', 'URL', 'Deskripsi', 'digital marketing', 'Gaji Minimum (per month)', 'Gaji Maksimum (per month)', 'Gaji Tidak Ditemukan', 'Hard Skills', 'Soft Skills', 'Non-Skills', 'Hard Skills (Normalized)', 'Soft Skills (Normalized)', 'Non-Skills (Normalized)']

Top 10 Hard Skills (Frequency)
                 skill_tag  frequency
0        digital marketing        803
1                      seo        316
2            data analysis        166
3         content creation        166
4               google ads        148
5         google analytics        133
6  social media management        122
7             social media         90
8          email marketing         81
9              copywriting         74

Top 10 Soft Skills (Frequency)
            skill_tag  frequency
0       communication        721
1            teamwork  

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# PREPROCESS KE-4 (EDGE LIST)

# =========================
# 1) Install & Import
# =========================
!pip install pandas openpyxl --quiet

import pandas as pd
import re
from google.colab import files

# =========================
# 2) Upload & Read Excel
# =========================
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# Baca file
try:
    df = pd.read_excel(file_name, sheet_name="Sheet1")
except Exception:
    xls = pd.ExcelFile(file_name)
    df = pd.read_excel(file_name, sheet_name=xls.sheet_names[0])

print("Kolom tersedia:", df.columns.tolist())

# =========================
# 3) Helper untuk pecah skills
# =========================
def clean_tag(tag: str) -> str:
    """Bersihkan skill tag: lowercase + trim spasi"""
    tag = str(tag).strip().lower()
    tag = re.sub(r"\s+", " ", tag)  # rapikan spasi ganda
    return tag

def make_edge_list(df, col_pos, col_skill, skill_type=None):
    """Buat edge list dari kolom posisi dan kolom skill"""
    edges = []
    for _, row in df[[col_pos, col_skill]].dropna().iterrows():
        pos = str(row[col_pos]).strip()
        skills = [clean_tag(s) for s in str(row[col_skill]).split(",") if s.strip()]
        for sk in skills:
            if skill_type:
                edges.append((pos, sk, skill_type))
            else:
                edges.append((pos, sk))
    return edges

# =========================
# 4) Buat edge list per kategori
# =========================
target_cols = {
    "Hard Skills (Normalized)":  ("Edges_Hard_Skills", "hard"),
    "Soft Skills (Normalized)":  ("Edges_Soft_Skills", "soft"),
    "Non-Skills (Normalized)":   ("Edges_Non_Skills", "non-skill"),
}

results = {}
all_edges = []

for col, (sheet_name, skill_type) in target_cols.items():
    if col not in df.columns:
        print(f"Peringatan: kolom '{col}' tidak ditemukan, dilewati.")
        continue

    edges = make_edge_list(df, "Posisi", col, skill_type=skill_type)
    df_edges = pd.DataFrame(edges, columns=["source", "target", "skill_type"])
    results[sheet_name] = df_edges[["source", "target"]]  # versi tanpa skill_type untuk per sheet
    all_edges.extend(edges)

# =========================
# 5) Simpan ke Excel
# =========================
output_xlsx = "edge_list_skills.xlsx"
with pd.ExcelWriter(output_xlsx, engine="openpyxl") as writer:
    # Simpan per kategori (tanpa skill_type kolom)
    for sheet_name, edges_df in results.items():
        edges_df.to_excel(writer, index=False, sheet_name=sheet_name[:31])

    # Simpan gabungan (dengan skill_type kolom)
    all_edges_df = pd.DataFrame(all_edges, columns=["source", "target", "skill_type"])
    all_edges_df.to_excel(writer, index=False, sheet_name="All_Skills")

print("\nFile tersimpan:", output_xlsx)
files.download(output_xlsx)


Saving (gaji minimal UMK)_classified_skills_normalized_revisi_1_gabungan.xlsx to (gaji minimal UMK)_classified_skills_normalized_revisi_1_gabungan.xlsx
Kolom tersedia: ['Posisi', 'Perusahaan', 'Lokasi', 'Tanggal', 'URL', 'Deskripsi', 'digital marketing', 'Gaji Minimum (per month)', 'Gaji Maksimum (per month)', 'Gaji Tidak Ditemukan', 'Hard Skills', 'Soft Skills', 'Non-Skills', 'Hard Skills (Normalized)', 'Soft Skills (Normalized)', 'Non-Skills (Normalized)']

File tersimpan: edge_list_skills.xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# PREPROCESS KE-4 EDGE LIST & NODE TABLES (REVISI 2)

# =========================
# 1) Install & Import
# =========================
!pip install pandas openpyxl --quiet

import pandas as pd
import re
from google.colab import files

# =========================
# 2) Upload & Read Excel
# =========================
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# Baca file: coba Sheet1, jika tidak ada pakai sheet pertama
try:
    df = pd.read_excel(file_name, sheet_name="Sheet1")
except Exception:
    xls = pd.ExcelFile(file_name)
    df = pd.read_excel(file_name, sheet_name=xls.sheet_names[0])

print("Kolom tersedia:", df.columns.tolist())

# Validasi kolom wajib
COL_POS = "Posisi"
SKILL_COLS = ["Hard Skills (Normalized)", "Soft Skills (Normalized)", "Non-Skills (Normalized)"]

if COL_POS not in df.columns:
    raise ValueError(f"Kolom wajib '{COL_POS}' tidak ditemukan.")

if not any(col in df.columns for col in SKILL_COLS):
    raise ValueError("Minimal salah satu kolom skill (Hard/Soft/Non) harus ada.")

# =========================
# 3) Helpers
# =========================
def clean_skill(tag: str) -> str:
    """Bersihkan skill tag: lowercase + trim spasi & spasi ganda."""
    tag = str(tag).strip().lower()
    tag = re.sub(r"\s+", " ", tag)
    return tag

def split_skills(cell) -> list:
    """Pecah cell skill berdasarkan koma dan bersihkan."""
    if pd.isna(cell):
        return []
    parts = [p for p in str(cell).split(",")]
    parts = [clean_skill(p) for p in parts]
    return [p for p in parts if p]  # buang kosong

def normalize_position(label: str) -> str:
    """Posisi: pakai label asli (trim), tidak di-lowercase agar terbaca enak di Gephi."""
    return str(label).strip()

# =========================
# 4) Konstruksi NODE ID (konsisten untuk posisi & skill)
# =========================
# Kumpulkan posisi unik
pos_labels = df[COL_POS].dropna().map(normalize_position)
pos_labels = pos_labels[pos_labels != ""].drop_duplicates().tolist()

# Kumpulkan semua skill unik (gabungan 3 kolom)
skill_labels = []
for col in SKILL_COLS:
    if col in df.columns:
        for cell in df[col]:
            skill_labels.extend(split_skills(cell))
skill_labels = sorted(list(set(skill_labels)))  # lowercase sudah dari clean_skill

# Buat ID map dengan namespace tipe node agar tidak bentrok
# Key = (node_type, label)
node_rows = []
id_map = {}   # (node_type, label) -> id (int)

next_id = 1

# Assign ID untuk posisi
for label in sorted(set(pos_labels)):   # sort agar deterministik
    key = ("posisi", label)
    id_map[key] = next_id
    node_rows.append({"Id": next_id, "Label": label, "node_type": "posisi"})
    next_id += 1

# Assign ID untuk skills
for label in skill_labels:
    key = ("skill", label)
    # Hindari tabrakan kalau (sangat jarang) ada posisi bernama sama persis dengan skill
    if key not in id_map:
        id_map[key] = next_id
        node_rows.append({"Id": next_id, "Label": label, "node_type": "skill"})
        next_id += 1

nodes_all = pd.DataFrame(node_rows, columns=["Id", "Label", "node_type"])

# Split kembali nodes per tipe
nodes_positions = nodes_all[nodes_all["node_type"] == "posisi"].copy()
nodes_skills    = nodes_all[nodes_all["node_type"] == "skill"].copy()

# Tambahan: peta referensi (node_type, label) -> id untuk debugging
node_id_map = pd.DataFrame(
    [{"node_type": k[0], "label": k[1], "Id": v} for k, v in id_map.items()],
    columns=["node_type", "label", "Id"]
).sort_values(["node_type", "label"]).reset_index(drop=True)

# =========================
# 5) Bangun EDGE LIST (raw dan weighted) per kategori & gabungan
# =========================
def make_edges(df, col_pos, col_skill, skill_type: str):
    """Kembalikan list edge (Source, Target, Type, Weight=1, skill_type, source_label, target_label)"""
    edges = []
    sub = df[[col_pos, col_skill]].dropna(how="all")
    for _, row in sub.iterrows():
        pos_label = normalize_position(row[col_pos])
        if not pos_label:
            continue
        skills = split_skills(row[col_skill])
        for sk in skills:
            sid = id_map[("posisi", pos_label)]
            tid = id_map[("skill", sk)]
            edges.append({
                "Source": sid,
                "Target": tid,
                "Type": "Undirected",
                "Weight": 1,
                "skill_type": skill_type,
                "source_label": pos_label,
                "target_label": sk
            })
    return edges

# Kategori kolom -> (nama sheet dasar, skill_type)
edge_conf = {
    "Hard Skills (Normalized)": ("Edges_Hard_Skills", "hard"),
    "Soft Skills (Normalized)": ("Edges_Soft_Skills", "soft"),
    "Non-Skills (Normalized)":  ("Edges_Non_Skills", "non-skill"),
}

edges_sheets_raw = {}
edges_sheets_weighted = {}
all_edges_rows = []

for col, (sheet_base, stype) in edge_conf.items():
    if col not in df.columns:
        print(f"Peringatan: kolom '{col}' tidak ditemukan, dilewati.")
        continue
    rows = make_edges(df, COL_POS, col, stype)
    if not rows:
        raw_df = pd.DataFrame(columns=["Id","Source","Target","Type","Weight","skill_type","source_label","target_label"])
        w_df   = pd.DataFrame(columns=["Id","Source","Target","Type","Weight","skill_type"])
    else:
        raw_df = pd.DataFrame(rows)
        raw_df.insert(0, "Id", [f"{sheet_base}_E{i+1}" for i in range(len(raw_df))])
        # Weighted: gabungkan edge duplikat
        w_df = (
            raw_df
            .groupby(["Source","Target","Type","skill_type"], as_index=False)["Weight"]
            .sum()
            .reset_index(drop=True)
        )
        w_df.insert(0, "Id", [f"{sheet_base}_W{i+1}" for i in range(len(w_df))])
        all_edges_rows.extend(rows)

    edges_sheets_raw[sheet_base] = raw_df[["Id","Source","Target","Type","Weight","skill_type","source_label","target_label"]]
    edges_sheets_weighted[sheet_base + "_Weighted"] = w_df[["Id","Source","Target","Type","Weight","skill_type"]]

# Gabungan
if all_edges_rows:
    all_edges_raw = pd.DataFrame(all_edges_rows)
    all_edges_raw.insert(0, "Id", [f"ALL_E{i+1}" for i in range(len(all_edges_raw))])
    all_edges_weighted = (
        all_edges_raw
        .groupby(["Source","Target","Type","skill_type"], as_index=False)["Weight"]
        .sum()
        .reset_index(drop=True)
    )
    all_edges_weighted.insert(0, "Id", [f"ALL_W{i+1}" for i in range(len(all_edges_weighted))])
else:
    all_edges_raw = pd.DataFrame(columns=["Id","Source","Target","Type","Weight","skill_type","source_label","target_label"])
    all_edges_weighted = pd.DataFrame(columns=["Id","Source","Target","Type","Weight","skill_type"])

# =========================
# 6) Tulis ke Excel & download
# =========================
output_xlsx = "gephi_nodes_edges_weighted_consistent_ids.xlsx"
with pd.ExcelWriter(output_xlsx, engine="openpyxl") as writer:
    # Edges per kategori (raw & weighted)
    for sheet_name, df_edges in edges_sheets_raw.items():
        df_edges.to_excel(writer, index=False, sheet_name=sheet_name[:31])
    for sheet_name, df_edges in edges_sheets_weighted.items():
        df_edges.to_excel(writer, index=False, sheet_name=sheet_name[:31])

    # Gabungan (raw & weighted)
    all_edges_raw.to_excel(writer, index=False, sheet_name="All_Skills")
    all_edges_weighted.to_excel(writer, index=False, sheet_name="All_Skills_Weighted")

    # Nodes
    nodes_positions.to_excel(writer, index=False, sheet_name="Nodes_Positions")
    nodes_skills.to_excel(writer, index=False, sheet_name="Nodes_Skills")
    nodes_all.to_excel(writer, index=False, sheet_name="Nodes_All")

    # Map referensi
    node_id_map.to_excel(writer, index=False, sheet_name="Node_ID_Map")

print("\nFile tersimpan:", output_xlsx)
files.download(output_xlsx)


Saving (gaji minimal UMK)_classified_skills_normalized_revisi_1_gabungan.xlsx to (gaji minimal UMK)_classified_skills_normalized_revisi_1_gabungan (2).xlsx
Kolom tersedia: ['Posisi', 'Perusahaan', 'Lokasi', 'Tanggal', 'URL', 'Deskripsi', 'digital marketing', 'Gaji Minimum (per month)', 'Gaji Maksimum (per month)', 'Gaji Tidak Ditemukan', 'Hard Skills', 'Soft Skills', 'Non-Skills', 'Hard Skills (Normalized)', 'Soft Skills (Normalized)', 'Non-Skills (Normalized)']

File tersimpan: gephi_nodes_edges_weighted_consistent_ids.xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>