In [None]:
import pandas as pd

df = pd.read_csv("transcations.csv")


In [None]:
df = df[['Description', 'Category']].head()

In [None]:
df[['Description', 'Category']].head()

In [None]:
df['Description'] = df['Description'].str.lower().str.replace('[^a-z\s]', '', regex=True)


In [None]:
df['Category'] = df['Category'].str.lower().str.replace('[^a-z\s]', '', regex=True)


In [None]:
from sklearn.model_selection import train_test_split

X = df['Description']
y = df['Category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [None]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(X_train_vec, y_train)


In [None]:
def predict_category(text):
    text = [text.lower()]
    text_vec = vectorizer.transform(text)
    return model.predict(text_vec)[0]

# Example
print(predict_category("Netflix"))


In [None]:
from sklearn.metrics import classification_report
y_pred = model.predict(X_test_vec)
print(classification_report(y_test, y_pred))


In [None]:
import pickle

# Save model
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save vectorizer
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)


In [None]:
import joblib

joblib.dump(model, 'model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')


In [None]:
from google.colab import files
files.download('model.pkl')
files.download('vectorizer.pkl')
