In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [4]:
train_data_path = 'train_data_en.csv'  
test_data_path = 'test_data_en.csv'  
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

In [5]:
train_data['combined_text'] = train_data['Задача en'] + ' ' + train_data['Обстановка en'] + ' ' + train_data['Оптимальный план en']
test_data['combined_text'] = test_data['Задача en'] + ' ' + test_data['Обстановка en'] + ' ' + test_data['Оптимальный план en']

In [6]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(train_data['combined_text'])
y_train = train_data['Успех предсказанного плана']

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X_train_tfidf, y_train, test_size=0.2, random_state=42)

In [8]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [9]:
y_val_pred = model.predict(X_val)
accuracy_val = accuracy_score(y_val, y_val_pred)
print(f'Accuracy on validation set: {accuracy_val}')

Accuracy on validation set: 0.6646971935007385


In [10]:
X_test_tfidf = tfidf_vectorizer.transform(test_data['combined_text'])

In [11]:
y_test_pred = model.predict(X_test_tfidf)

In [12]:
print(y_test_pred[:5])

[1 0 1 1 1]


In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')