In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np


data = pd.read_csv("winequality-red.csv", sep=";")

data['good_quality'] = data['quality'].apply(lambda x: 1 if x >= 6 else 0)

X = data.drop(['quality', 'good_quality'], axis=1)
y = data['good_quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

log_reg_model = LogisticRegression(max_iter=100000)
log_reg_model.fit(X_train, y_train)

decision_tree_model = DecisionTreeClassifier(random_state=42, max_depth=10)
decision_tree_model.fit(X_train, y_train)

log_reg_pred = log_reg_model.predict(X_test)
decision_tree_pred = decision_tree_model.predict(X_test)

log_reg_accuracy = accuracy_score(y_test, log_reg_pred)
decision_tree_accuracy = accuracy_score(y_test, decision_tree_pred)

print("Accuracy of Logistic Regression:", log_reg_accuracy)
print("Accuracy of Decision Tree:", decision_tree_accuracy)


Accuracy of Logistic Regression: 0.7229166666666667
Accuracy of Decision Tree: 0.7604166666666666


In [44]:
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import f1_score

if log_reg_accuracy > decision_tree_accuracy:
    best_model = log_reg_model
else:
    best_model = decision_tree_model

bagging_model = BaggingClassifier(base_estimator=best_model, n_estimators=1500, random_state=42)
bagging_model.fit(X_train, y_train)

bagging_pred = bagging_model.predict(X_test)

f1_score_value = f1_score(y_test, bagging_pred)

print("F1 Score with Bagging:", f1_score_value)


F1 Score with Bagging: 0.8181818181818181


In [45]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

random_forest_default = RandomForestRegressor(random_state=42)
random_forest_default.fit(X_train, y_train)
y_pred_default = random_forest_default.predict(X_test)
mse_default = mean_squared_error(y_test, y_pred_default)

optimal_params = {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1}
random_forest_optimal = RandomForestRegressor(random_state=42, **optimal_params)
random_forest_optimal.fit(X_train, y_train)
y_pred_optimal = random_forest_optimal.predict(X_test)
mse_optimal = mean_squared_error(y_test, y_pred_optimal)

mse_improvement = mse_default - mse_optimal

print("Improvement in MSE:", round(mse_improvement, 1))


Improvement in MSE: -0.0


In [105]:
weather=pd.read_csv('temps_extended (1).csv')
y = weather['actual']
X = weather.drop(['actual','weekday','month','day','year'],axis =1)
X_train, X_val, Y_train, Y_val=train_test_split(X,y,test_size=0.3, random_state=42)


In [106]:
from sklearn.ensemble import RandomForestRegressor
from pprint import pprint
rf = RandomForestRegressor(random_state = 42)
# Look at parameters used by our current forest
print('Параметры по умолчанию:\n')
pprint(rf.get_params())


Параметры по умолчанию:

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 1.0,
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}


In [107]:
from sklearn.model_selection import RandomizedSearchCV
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}


In [109]:
rf = RandomForestRegressor(random_state=42)
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter=100, 
                               cv=3, verbose=2, random_state=42, n_jobs=-1)
rf_random.fit(X_train, Y_train)
rf_random.best_params_


Fitting 3 folds for each of 100 candidates, totalling 300 fits


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


{'n_estimators': 1000,
 'min_samples_split': 5,
 'min_samples_leaf': 2,
 'max_features': 'sqrt',
 'max_depth': 10,
 'bootstrap': True}

In [110]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

rf_default = RandomForestRegressor(random_state=42)

rf_default.fit(X_train, Y_train)

y_pred_default = rf_default.predict(X_val)

mse_default = mean_squared_error(Y_val, y_pred_default)

rf_optimal = RandomForestRegressor(random_state=42, **rf_random.best_params_)

rf_optimal.fit(X_train, Y_train)

y_pred_optimal = rf_optimal.predict(X_val)

mse_optimal = mean_squared_error(Y_val, y_pred_optimal)

mse_improvement = mse_default - mse_optimal

print("Улучшение MSE при использовании оптимальных параметров:", round(mse_improvement, 1))


Улучшение MSE при использовании оптимальных параметров: 1.5
