In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error
from statsmodels.tsa.stattools import adfuller

# Загрузка данных
data = pd.read_excel('train.xlsx')

In [None]:
# Очистка выбросов с помощью Local Outlier Factor
from sklearn.neighbors import LocalOutlierFactor
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
outliers = lof.fit_predict(data[['Цена на арматуру']])
data = data[outliers == 1]

data['dt'] = pd.to_datetime(data['dt'])
data.set_index('dt', inplace=True)

In [None]:
# Проверка стационарности
result = adfuller(data['Цена на арматуру'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

In [None]:
# Линейная регрессия для прогноза цен
X = np.array(range(len(data))).reshape(-1, 1)
y = data['Цена на арматуру'].values

model = LinearRegression().fit(X, y)
plt.figure(figsize=(10, 5))
plt.plot(data.index, y, label='Реальные данные')
plt.plot(data.index, model.predict(X), label='Линейная регрессия', color='red')
plt.legend()
plt.show()

In [None]:
# Создание целевой переменной для логистической регрессии (рост/падение)
data['target'] = (data['Цена на арматуру'].diff() > 0).astype(int)
data.dropna(inplace=True)

X_class = np.array(range(len(data))).reshape(-1, 1)
y_class = data['target']

X_train, X_test, y_train, y_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

log_model = LogisticRegression()
log_model.fit(X_train, y_train)
y_pred = log_model.predict(X_test)

print(f'Accuracy логистической регрессии: {accuracy_score(y_test, y_pred)}')


In [None]:
# Прогнозирование цены с помощью случайного леса
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print(f'MAE случайного леса: {mean_absolute_error(y_test, y_pred_rf)}')

plt.figure(figsize=(10, 5))
plt.plot(data.index[-len(y_test):], y_test, label='Реальные данные')
plt.plot(data.index[-len(y_test):], y_pred_rf, label='Random Forest', color='green')
plt.legend()
plt.show()