In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import folium

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

#Support Vector Machines (SVM)
#XGBoost 

import sys
import os

random_state = 10

backend_path = os.path.abspath('../backend')
sys.path.append(backend_path)
from get_metrics import get_metrics_regression, get_metrics_classification
from check_overfitting import check_overfitting

In [None]:
# Чтение DataFrame df в файл data/df.csv
df = pd.read_csv('../data/df.csv')

In [None]:
# берем сэмпл 100 000 строк
df_sample = df[-100000:]

In [None]:
df_sample.info()

In [None]:
# признаки
X = df_sample[['significance', 'depth', 'year', 'month', 'day', 'hour', 'minute', 'second']]

# целевые переменные
y = df_sample[['magnitude', 'longitude', 'latitude']]

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=random_state)


In [None]:
# модель RandomForestRegressor
rfr = RandomForestRegressor(random_state=random_state)
# Обучаем модель
rfr.fit(X_train, y_train)
# предсказания на test 
y_pred_rfr = rfr.predict(X_test)

In [None]:
# Проверка на переобучение модели RandomForestRegressor
check_overfitting(rfr, X_train, y_train, X_test, y_test, mean_squared_error)

In [None]:
# Просмотр метрик модели обученной на train
metrics = get_metrics_regression(y_test,
                                 y_pred = rfr.predict(X_test),
                                 X_test = X_test,
                                 name='RandomForestRegressor_Baseline')
metrics

In [None]:
# модель DecisionTreeRegressor
dtr = DecisionTreeRegressor(random_state=random_state)
# Обучаем модель
dtr.fit(X_train, y_train)
# предсказания на test 
y_pred_dtr = dtr.predict(X_test)

In [None]:
# Проверка на переобучение модели DecisionTreeRegressor
check_overfitting(dtr, X_train, y_train, X_test, y_test, mean_squared_error)

In [None]:
metrics = pd.concat([
    metrics,
    get_metrics_regression(y_test,
                           y_pred = dtr.predict(X_test),
                           X_test = X_test,
                           name='RandomForestRegressor_Baseline')])
metrics