# Train a classifier

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression

# Load the DialoGPT model and tokenizer
model_name = "microsoft/DialoGPT-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

questions = [
    "Have you been experiencing any negative mood changes or health problems recently? If yes, please describe.",
    "How long (in weeks) have you been facing these problems?",
    "How have these problems impacted your life, work, and relationships?",
    "On a scale of 1 to 10, how severely impacted do you feel by these problems? 10 stands for the most severe impact and 1 for the least severe impact.",
    "Is there a life event that might have led to these changes? Please describe.",

    ""
]

# Placeholder as examples
question_types = [
    "negative mood",
    "the duration",
    "the impact",
    "the scale",
    "life event",
    "",
    "",
    
    ""
]

# Mock classification model training - in practice, you'd have a larger, annotated dataset for this
def train_response_classifier():
    responses = [
        ("description", "I have been feeling quite down lately."),
        ("description", "Yes, I often feel tired."),
        ("duration", "It has been about 5 weeks."),
        ("duration", "For a couple of months."),
        ("impact", "It has really affected my work performance."),
        ("impact", "My relationships are suffering."),
        ("scale", "I would say it's about an 8."),
        ("scale", "Maybe a 6.")
    ]

    vectorizer = CountVectorizer()
    train_texts = [response[1] for response in responses]
    train_labels = [response[0] for response in responses]

    X = vectorizer.fit_transform(train_texts)
    classifier = LogisticRegression()
    classifier.fit(X, train_labels)
    
    return vectorizer, classifier

vectorizer, classifier = train_response_classifier()

def classify_response(question_type, response):
    X = vectorizer.transform([response])
    pred = classifier.predict(X)[0]
    return pred == question_type

def generate_response(prompt):
    inputs = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt")
    outputs = model.generate(inputs, max_length=1000, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[:, inputs.shape[-1]:][0], skip_special_tokens=True)
    return response
