# Ticket Assignee Predictor (POC)

Train a simple model to predict assignee based on title, description, and labels (as list).

In [None]:
import pandas as pd
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import joblib

In [None]:
# Load mock data
df = pd.read_csv('../data/issues_mock.csv')
# Convert labels from JSON string to list, then join as space-separated string for TF-IDF
df['labels'] = df['labels'].apply(lambda x: ' '.join(json.loads(x)))
df['text'] = df['title'] + ' ' + df['description'] + ' ' + df['labels']

In [None]:
# Prepare features and labels
X = df['text']
y = df['assignee']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Build a simple pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', RandomForestClassifier(random_state=42))
]

pipeline.fit(X_train, y_train)
print(f"Validation accuracy: {pipeline.score(X_test, y_test):.2f}")

In [None]:
# Save the model
joblib.dump(pipeline, '../model/ticket_assigner.pkl')
print('Model saved!')