In [None]:
# Required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Read Data
data = pd.read_csv('persian_news_entekhab_dataset.csv')

In [None]:
data.head()

Unnamed: 0,Text,Label
0,تصاویر: موج پرشور رای‌گیری در کرمانشاه,سیاست
1,عکس / فخرالسادات محتشمی‌پور، همسر تاج‌زاده در ...,سیاست
2,عکس / بهزاد نبوی، فعال کهنه کار سیاسی، در انتخ...,سیاست
3,ظریف: تمام تلاش قفس سازان نگاه داشتن ‎ما مردم ...,سیاست
4,زمان رای‌گیری تا ساعت ۲۰ تمدید شد,سیاست


In [None]:
# Split into independent and dependent variables
X = data['Text']
y = data['Label']

In [None]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize CountVectorizer
# یادم باشه روی تک تک پارامترها صحبت کنم.
vectorizer = CountVectorizer()

In [None]:
len(vectorizer.get_feature_names_out())

5826

In [None]:
# Fit and transform the training data
X_train_bow = vectorizer.fit_transform(X_train)

In [None]:
# Initialize Logistic Regression model
model = LogisticRegression()

# Train the model
model.fit(X_train_bow, y_train)

In [None]:
# Transform the test data
X_test_bow = vectorizer.transform(X_test)

# Make predictions
y_pred = model.predict(X_test_bow)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.925


In [None]:
report = classification_report(y_test, y_pred, output_dict=True)

# Convert to DataFrame
report_df = pd.DataFrame(report).transpose()

# Reset index for better readability and rename columns
report_df.reset_index(inplace=True)
report_df.rename(columns={'index': 'Class'}, inplace=True)

# Display the DataFrame
report_df

Unnamed: 0,Class,precision,recall,f1-score,support
0,سلامت,0.873239,0.815789,0.843537,76.0
1,سیاست,0.990991,0.901639,0.944206,122.0
2,فناوری,0.965909,0.801887,0.876289,106.0
3,ورزش,0.907317,0.989362,0.946565,376.0
4,accuracy,0.925,0.925,0.925,0.925
5,macro avg,0.934364,0.877169,0.902649,680.0
6,weighted avg,0.927654,0.925,0.923672,680.0
