In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
train_data_path = 'train_data_en.csv'
test_data_path = 'test_data_en.csv'
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

In [7]:
train_data['combined_text'] = train_data['Задача en'] + ' ' + train_data['Обстановка en'] + ' ' + train_data['Оптимальный план en']
test_data['combined_text'] = test_data['Задача en'] + ' ' + test_data['Обстановка en'] + ' ' + test_data['Оптимальный план en']

In [8]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

In [9]:
pipeline = Pipeline([
    ('tfidf', tfidf_vectorizer),
    ('rf', rf_classifier)
])

In [10]:
X = train_data['combined_text']
y = train_data['Успех предсказанного плана']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
pipeline.fit(X_train, y_train)

In [12]:
y_val_pred = pipeline.predict(X_val)
accuracy_val = accuracy_score(y_val, y_val_pred)
print(f'Accuracy of Random Forest on validation set: {accuracy_val}')

Accuracy of Random Forest on validation set: 0.6573116691285081
