In [1]:
import pandas as pd
import random

web_dev_templates = [
    "Experienced in {frontend} and building responsive {web_type}.",
    "Developed {backend} APIs using {framework}.",
    "Front-end developer skilled in {tool}.",
    "Built {project_type} web apps using {lang} and {db}."
]

data_science_templates = [
    "Experienced in {lang} and performing {task}.",
    "Built {model_type} models using {framework}.",
    "Performed {technique} using {lib}.",
    "Worked on {project} and data preprocessing."
]

uiux_templates = [
    "Designed {app_type} using {tool}.",
    "Created {artifact} for web applications.",
    "Conducted {activity} and {testing}.",
    "Experienced in {concept} and interaction design."
]

frontend = ["HTML, CSS, JavaScript", "React", "Vue.js", "Angular"]
web_type = ["websites", "dashboards", "portfolios", "landing pages"]
backend = ["RESTful", "GraphQL", "microservice"]
framework = ["Node.js", "Express", "Django", "Flask"]
tool = ["Bootstrap", "Tailwind", "SASS", "Next.js"]
project_type = ["e-commerce", "news portal", "management system"]
lang = ["Python", "PHP", "JavaScript"]
db = ["MySQL", "MongoDB", "PostgreSQL"]

model_type = ["machine learning", "deep learning"]
framework_ds = ["scikit-learn", "TensorFlow", "PyTorch"]
technique = ["data visualization", "EDA", "feature engineering"]
lib = ["Pandas", "Matplotlib", "Seaborn"]
project = ["predictive modeling", "time series forecasting"]
task = ["data analysis", "statistical modeling"]

app_type = ["mobile apps", "web dashboards", "admin panels"]
artifact = ["wireframes", "user flows", "mockups"]
activity = ["user research", "competitive analysis"]
testing = ["usability testing", "A/B testing"]
concept = ["UI prototyping", "user-centered design"]
tool_uiux = ["Figma", "Adobe XD", "Sketch"]

def generate_data(n=100):
    texts = []
    labels = []
    for _ in range(n):
        category = random.choice(["Web Developer", "Data Science", "UI/UX Design"])
        if category == "Web Developer":
            text = random.choice(web_dev_templates).format(
                frontend=random.choice(frontend),
                web_type=random.choice(web_type),
                backend=random.choice(backend),
                framework=random.choice(framework),
                tool=random.choice(tool),
                project_type=random.choice(project_type),
                lang=random.choice(lang),
                db=random.choice(db)
            )
        elif category == "Data Science":
            text = random.choice(data_science_templates).format(
                lang=random.choice(["Python", "R"]),
                task=random.choice(task),
                model_type=random.choice(model_type),
                framework=random.choice(framework_ds),
                technique=random.choice(technique),
                lib=random.choice(lib),
                project=random.choice(project)
            )
        else:
            text = random.choice(uiux_templates).format(
                app_type=random.choice(app_type),
                tool=random.choice(tool_uiux),
                artifact=random.choice(artifact),
                activity=random.choice(activity),
                testing=random.choice(testing),
                concept=random.choice(concept)
            )
        texts.append(text)
        labels.append(category)
    return pd.DataFrame({"text": texts, "label": labels})

df = generate_data(300)

df.to_csv("cv_dummy_dataset.csv", index=False)

print("Dataset berhasil dibuat dan disimpan sebagai cv_dummy_dataset.csv")

Dataset berhasil dibuat dan disimpan sebagai cv_dummy_dataset.csv
                                                  text          label
109  Experienced in React and building responsive p...  Web Developer
84           Front-end developer skilled in Bootstrap.  Web Developer
168  Worked on predictive modeling and data preproc...   Data Science
239                    Performed EDA using Matplotlib.   Data Science
275   Built e-commerce web apps using PHP and MongoDB.  Web Developer


In [4]:
df.head()

Unnamed: 0,text,label
0,Worked on predictive modeling and data preproc...,Data Science
1,Front-end developer skilled in SASS.,Web Developer
2,Created user flows for web applications.,UI/UX Design
3,Worked on time series forecasting and data pre...,Data Science
4,Built machine learning models using PyTorch.,Data Science


In [5]:
!pip install sentence-transformers scikit-learn joblib pandas



In [8]:
import pandas as pd
import joblib
from sentence_transformers import SentenceTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

def train_model_st():
    df = pd.read_csv("cv_dummy_dataset.csv")

    if "text" not in df.columns or "label" not in df.columns:
        raise ValueError("Dataset harus memiliki kolom 'text' dan 'label'")

    X = df["text"].astype(str)
    y = df["label"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    model_st = SentenceTransformer("all-MiniLM-L6-v2")

    print("ðŸ”¹ Mengubah teks menjadi embedding...")
    X_train_vec = model_st.encode(X_train.tolist(), show_progress_bar=True)
    X_test_vec = model_st.encode(X_test.tolist(), show_progress_bar=True)

    clf = LogisticRegression(max_iter=2000)
    clf.fit(X_train_vec, y_train)

    y_pred = clf.predict(X_test_vec)
    print("\nðŸ“Š Classification Report:")
    print(classification_report(y_test, y_pred))

    joblib.dump((clf, model_st), "model_st.joblib")
    print("\nâœ… Model berhasil disimpan sebagai classify_cv/model_st.joblib")

if __name__ == "__main__":
    train_model_st()

ðŸ”¹ Mengubah teks menjadi embedding...


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]


ðŸ“Š Classification Report:
               precision    recall  f1-score   support

 Data Science       1.00      1.00      1.00        22
 UI/UX Design       1.00      1.00      1.00        19
Web Developer       1.00      1.00      1.00        19

     accuracy                           1.00        60
    macro avg       1.00      1.00      1.00        60
 weighted avg       1.00      1.00      1.00        60


âœ… Model berhasil disimpan sebagai classify_cv/model_st.joblib


In [3]:
from google.colab import files
files.download("model_tfidf.joblib")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>