In [None]:
import pandas as pd
from google.colab import files
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

In [None]:
uploaded = files.upload()

Saving CrimesOnWomenData.csv to CrimesOnWomenData.csv


In [None]:
data = pd.read_csv("CrimesOnWomenData.csv")
print(data.head())

   Unnamed: 0              State  Year  Rape   K&A   DD   AoW   AoM    DV  WT
0           0     ANDHRA PRADESH  2001   871   765  420  3544  2271  5791   7
1           1  ARUNACHAL PRADESH  2001    33    55    0    78     3    11   0
2           2              ASSAM  2001   817  1070   59   850     4  1248   0
3           3              BIHAR  2001   888   518  859   562    21  1558  83
4           4       CHHATTISGARH  2001   959   171   70  1763   161   840   0


In [None]:
# Предобработка данных: кодирование категориальных переменных
data['State'] = data['State'].astype('category').cat.codes

In [None]:
# Определим признаки и целевую переменную (например, будем предсказывать 'Rape')
X = data.drop(columns=['Rape'])
y = data['Rape']

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Обучение модели Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Прогнозирование и оценка качества
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f'Accuracy of Random Forest: {rf_accuracy}')

Accuracy of Random Forest: 0.04054054054054054


In [None]:
# Обучение модели Gradient Boosting
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)

# Прогнозирование и оценка качества
gb_predictions = gb_model.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_predictions)
print(f'Accuracy of Gradient Boosting: {gb_accuracy}')

Accuracy of Gradient Boosting: 0.013513513513513514


In [None]:
# Определение базовых моделей для стэкинга
base_models = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('gb', GradientBoostingClassifier(random_state=42))
]

# Определение мета-модели
stacking_model = StackingClassifier(estimators=base_models, final_estimator=LogisticRegression())
stacking_model.fit(X_train, y_train)

# Прогнозирование и оценка качества
stacking_predictions = stacking_model.predict(X_test)
stacking_accuracy = accuracy_score(y_test, stacking_predictions)
print(f'Accuracy of Stacking Classifier: {stacking_accuracy}')



Accuracy of Stacking Classifier: 0.013513513513513514


In [None]:
print(f'Random Forest Accuracy: {rf_accuracy}')
print(f'Gradient Boosting Accuracy: {gb_accuracy}')
print(f'Stacking Classifier Accuracy: {stacking_accuracy}')

# Определяем лучшую модель
best_accuracy = max(rf_accuracy, gb_accuracy, stacking_accuracy)
if best_accuracy == rf_accuracy:
    print("Лучшее решение: Random Forest")
elif best_accuracy == gb_accuracy:
    print("Лучшее решение: Gradient Boosting")
else:
    print("Лучшее решение: Stacking Classifier")

Random Forest Accuracy: 0.04054054054054054
Gradient Boosting Accuracy: 0.013513513513513514
Stacking Classifier Accuracy: 0.013513513513513514
Лучшее решение: Random Forest
