In [None]:
!pip install joblib



In [None]:
import xgboost as xgb
import joblib
import numpy as np
import pandas as pd
from google.colab import files
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('/content/prepared_internet_service_churn.csv')

In [None]:
df.head()

Unnamed: 0,is_tv_subscriber,is_movie_package_subscriber,subscription_age,bill_avg,reamining_contract,service_failure_count,download_avg,upload_avg,download_over_limit,churn
0,1,0,11.95,25,0.14,0,8.4,2.3,0,0
1,0,0,8.22,0,0.716039,0,0.0,0.0,0,1
2,1,0,8.91,16,0.0,0,13.7,0.9,0,1
3,0,0,6.87,21,0.716039,1,0.0,0.0,0,1
4,0,0,6.39,0,0.716039,0,0.0,0.0,0,1


In [None]:
X = df.drop(columns='churn')
y = df['churn']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 200, 300],
    'subsample': [0.8, 1.0]
}

In [None]:
model = xgb.XGBClassifier()

In [None]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=5, verbose=1)

In [None]:
eval_set = [(X_train, y_train), (X_test, y_test)]

In [None]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 54 candidates, totalling 270 fits


In [None]:
print("Лучшие параметры:", grid_search.best_params_)
print("Лучшая точность:", grid_search.best_score_)

Лучшие параметры: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 200, 'subsample': 1.0}
Лучшая точность: 0.9438419842055762


In [None]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.94
Precision: 0.96
Recall: 0.94
F1 Score: 0.95


In [None]:
y_pred_df = pd.DataFrame({'y_pred': y_pred})
y_test_df = pd.DataFrame({'y_test': y_test})
y_train_df = pd.DataFrame({'y_train': y_train})


y_pred_df.to_json('y_pred.json', orient='records', lines=True)
files.download('y_pred.json')


y_test_df.to_json('y_test.json', orient='records', lines=True)
files.download('y_test.json')


y_train_df.to_json('y_train.json', orient='records', lines=True)
files.download('y_train.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
joblib.dump(best_model, 'best_model.pkl')

files.download('best_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>