In [1]:
import pandas as pd

df = pd.read_csv('../data/cleaned_texts.csv')
df



Unnamed: 0,subject,text
0,math,algebra is a branch of mathematics dealing wit...
1,science,photosynthesis is the process by which plants ...
2,python,python is a high-level programming language us...
3,ml,machine learning allows systems to learn from ...


In [2]:
df['difficulty'] = ['easy', 'easy', 'medium', 'medium']
df


Unnamed: 0,subject,text,difficulty
0,math,algebra is a branch of mathematics dealing wit...,easy
1,science,photosynthesis is the process by which plants ...,easy
2,python,python is a high-level programming language us...,medium
3,ml,machine learning allows systems to learn from ...,medium


In [3]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
import joblib

X = df['text']
y = df['difficulty']

vectorizer = CountVectorizer()
X_vec = vectorizer.fit_transform(X)

model = LogisticRegression()
model.fit(X_vec, y)

preds = model.predict(X_vec)

print("Accuracy:", accuracy_score(y, preds))
print("F1 Score:", f1_score(y, preds, average='weighted'))


Accuracy: 1.0
F1 Score: 1.0


In [4]:
joblib.dump(model, '../models/quiz_classifier.pkl')
joblib.dump(vectorizer, '../models/vectorizer.pkl')


['../models/vectorizer.pkl']

In [5]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=2)
kmeans.fit(X_vec)

joblib.dump(kmeans, '../models/kmeans_topics.pkl')


['../models/kmeans_topics.pkl']

In [6]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
import joblib

questions = [
    "What is addition?",
    "Define variable",
    "Solve x + 2 = 5",
    "Explain backpropagation",
    "How does gradient descent work?"
]

labels = ["easy", "easy", "medium", "hard", "hard"]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(questions)

model = LogisticRegression()
model.fit(X, labels)

joblib.dump(model, "../models/difficulty_model.pkl")
joblib.dump(vectorizer, "../models/difficulty_vectorizer.pkl")


['../models/difficulty_vectorizer.pkl']