In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
import wandb
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
import joblib
import json
import os

In [None]:
df = pd.read_csv('./data/df_for_ML_new.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
df['pool'] = df['pool'].fillna(0)
df.fillna(-1, inplace=True)
df['year_built'] = df['year_built'].astype(int)

In [None]:
X = df.drop('target', axis=1)
y = df['target']

In [None]:
with open('/home/dwarf/diplom/wandb.json', 'r') as f:
  data = json.load(f)
  os.environ['WANDB_API_KEY'] = data["key"]

wandb.login()
run = wandb.init(entity=None, project="diplom_0106", name = 'knn_run')

# Разделяем на обучающий и тестовый наборы данных
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Создаем модель k-NN с параметром k = 3
model = KNeighborsRegressor(n_neighbors=3)

# Обучаем модель на обучающем наборе данных
model.fit(X_train, y_train)

# Предсказываем стоимости на тестовом наборе данных
y_pred = model.predict(X_test)

# Рассчитываем метрики качества
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Логирование метрик в W&B
wandb.log({"MAE": mae})
wandb.log({"MSE": mse})
wandb.log({"R2": r2})

# Сохранение модели
model_filename = "random_forest_model.pkl"
joblib.dump(model, model_filename)

# Загрузка сохраненной модели в W&B
wandb.save(model_filename)

# Завершение сессии W&B
wandb.finish()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

with open('/home/dwarf/diplom/wandb.json', 'r') as f:
  data = json.load(f)
  os.environ['WANDB_API_KEY'] = data["key"]

wandb.login()

n_estimators = [50, 100, 150, 200]
max_depths = [5, 10, 15, 20, 25, 30]

for estimators in n_estimators:
    for depth in max_depths:
      run = wandb.init(entity=None, project="diplom_temp", name = f'rand_for_reg est={estimators} depth={depth}')

      # Создание и обучение модели случайного леса для регрессии
      model = RandomForestRegressor(n_estimators=estimators, max_depth=depth, random_state=42)

      model.fit(X_train, y_train)

      # Предсказание на тестовой выборке
      y_pred = model.predict(X_test)

      # Рассчитываем метрики качества
      mae = mean_absolute_error(y_test, y_pred)
      mse = mean_squared_error(y_test, y_pred)
      r2 = r2_score(y_test, y_pred)

      # Логирование метрик в W&B
      wandb.log({"MAE": mae})
      wandb.log({"MSE": mse})
      wandb.log({"R2": r2})   

      # Сохранение модели
      model_filename = "random_forest_model.pkl"
      joblib.dump(model, model_filename)

      # Загрузка сохраненной модели в W&B
      wandb.save(model_filename)

# Завершение сессии W&B
wandb.finish()

In [None]:
with open('/home/dwarf/diplom/wandb.json', 'r') as f:
  data = json.load(f)
  os.environ['WANDB_API_KEY'] = data["key"]

wandb.login()

run = wandb.init(entity=None, project="diplom_0106", name = f'linear_regression')

scaler = MinMaxScaler()

X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

# Предсказание на тестовом наборе
y_pred = model.predict(X_test)

# Рассчитываем метрики качества
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Логирование метрик в W&B
wandb.log({"MAE": mae})
wandb.log({"MSE": mse})
wandb.log({"R2": r2})   

# Сохранение модели
model_filename = "linear_regerssion.pkl"
joblib.dump(model, model_filename)
scaler_filename = 'min_max_scaler.pkl'
joblib.dump(scaler, scaler_filename)

# Загрузка сохраненной модели в W&B
wandb.save(model_filename)
wandb.save(scaler_filename)

# Завершение сессии W&B
wandb.finish()

In [None]:
with open('/home/dwarf/diplom/wandb.json', 'r') as f:
  data = json.load(f)
  os.environ['WANDB_API_KEY'] = data["key"]

wandb.login()

# Разделение данных на тренировочный и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

max_depths = [5, 10, 15, 20, 25, 30]

for depth in max_depths:
  run = wandb.init(entity=None, project="diplom_0106", name = f'Decision Tree depth={depth}')

  # Создание и обучение модели дерева решений
  model = DecisionTreeRegressor(random_state=42)
  model.fit(X_train, y_train)

  # Предсказание на тестовом наборе
  y_pred = model.predict(X_test)

  # Рассчитываем метрики качества
  mae = mean_absolute_error(y_test, y_pred)
  mse = mean_squared_error(y_test, y_pred)
  r2 = r2_score(y_test, y_pred)

  # Логирование метрик в W&B
  wandb.log({"MAE": mae})
  wandb.log({"MSE": mse})
  wandb.log({"R2": r2})   

  # Сохранение модели
  model_filename = "decision_tree_model.pkl"
  joblib.dump(model, model_filename)

  # Загрузка сохраненной модели в W&B
  wandb.save(model_filename)

# Завершение сессии W&B
wandb.finish()