In [30]:
import numpy as np
import pandas as pd
# Задаем формат отображения чисел с плавающей точкой
pd.options.display.float_format ='{:,.2f}'.format

In [31]:
# Загружаем данные
df = pd.read_csv(r'C:\Users\sepa\Desktop\orders_20190822.csv', sep=';')
# Заменяем типы данных на подходящие
df['price'] = df['price'].replace(',','.', regex=True).astype('float')
df['o_date'] = pd.to_datetime(df['o_date'], dayfirst=True)
df.dtypes

id_o                int64
user_id             int64
price             float64
o_date     datetime64[ns]
dtype: object

In [32]:
df

Unnamed: 0,id_o,user_id,price,o_date
0,1234491,337544,539.00,2016-01-01
1,1234494,171642,153.30,2016-01-01
2,1234497,260596,55.30,2016-01-01
3,1234498,1105609,752.50,2016-01-01
4,1234500,982696,4410.00,2016-01-01
...,...,...,...,...
2002799,6945524,5806002,825.30,2017-12-31
2002800,6945526,5919142,4934.30,2017-12-31
2002801,6945527,1574166,804.30,2017-12-31
2002802,6945528,5919156,5019.70,2017-12-31


### Сумма по годам и месяцам

In [53]:
month_mean = df.groupby([df['o_date'].dt.year, df['o_date'].dt.month,])['price'].sum().to_frame()
#a = month_mean.to_frame()
month_mean.index = month_mean.index.set_names(['year', 'month'])
month_mean.reset_index(inplace=True)
month_mean['num']=[i+1 for i in range(month_mean['price'].count())]
month_mean

Unnamed: 0,year,month,price,num
0,2016,1,96812334.85,1
1,2016,2,95436530.7,2
2,2016,3,115270625.4,3
3,2016,4,139324010.7,4
4,2016,5,108587343.2,5
5,2016,6,116676502.6,6
6,2016,7,113191827.7,7
7,2016,8,139561892.4,8
8,2016,9,152917024.4,9
9,2016,10,211633380.7,10


In [55]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(month_mean['num'].values.reshape((-1, 1)), month_mean['price'].values)
_a = reg.coef_[0]
_b = reg.intercept_
print('коэффициент: ', _a)
print('свободный член: ',_b)

коэффициент:  8430033.686722012
свободный член:  83903222.30141284


### Тренд

In [57]:
tr = month_mean['num'] * _a + _b
tr

0     92,333,255.99
1    100,763,289.67
2    109,193,323.36
3    117,623,357.05
4    126,053,390.74
5    134,483,424.42
6    142,913,458.11
7    151,343,491.80
8    159,773,525.48
9    168,203,559.17
10   176,633,592.86
11   185,063,626.54
12   193,493,660.23
13   201,923,693.92
14   210,353,727.60
15   218,783,761.29
16   227,213,794.98
17   235,643,828.66
18   244,073,862.35
19   252,503,896.04
20   260,933,929.72
21   269,363,963.41
22   277,793,997.10
23   286,224,030.78
Name: num, dtype: float64

### Отклонение фактических значений от значений тренда

In [66]:
dr = pd.DataFrame({'deviation': month_mean['price']/tr, 'month': month_mean['month']})
dr

Unnamed: 0,deviation,month
0,1.05,1
1,0.95,2
2,1.06,3
3,1.18,4
4,0.86,5
5,0.87,6
6,0.79,7
7,0.92,8
8,0.96,9
9,1.26,10


In [69]:
_mean = dr.groupby('month').mean()
print('Среднее отклонение для каждого месяца: ', _mean)

Среднее отклонение для каждого месяца:         deviation
month           
1           0.98
2           0.87
3           1.04
4           1.04
5           0.91
6           0.83
7           0.78
8           0.87
9           0.88
10          1.15
11          1.31
12          1.35


In [73]:
mm = _mean.mean()
print('Общий индекс сезонности: ', mm)

Общий индекс сезонности:  deviation   1.00
dtype: float64


### Коэффициенты сезонности очищенные от роста

In [76]:
season = _mean/mm
season

Unnamed: 0_level_0,deviation
month,Unnamed: 1_level_1
1,0.98
2,0.87
3,1.04
4,1.04
5,0.91
6,0.82
7,0.78
8,0.87
9,0.88
10,1.15


In [79]:
next = [i+25 for i in range(12)]
print('Месяцы прогноза: ', next)

Месяцы прогноза:  [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]


### Тренд на месяци прогноза

In [85]:
np.array(next) * _a + _b
next_tr = pd.DataFrame({'month': [i+1 for i in range(12)],'trend': np.array(next) * _a + _b, 'next_m': next})
next_tr

Unnamed: 0,month,trend,next_m
0,1,294654064.47,25
1,2,303084098.16,26
2,3,311514131.84,27
3,4,319944165.53,28
4,5,328374199.22,29
5,6,336804232.9,30
6,7,345234266.59,31
7,8,353664300.28,32
8,9,362094333.96,33
9,10,370524367.65,34


### Рассчитываем прогноз. Для этого значения линейного тренда умножаем на коэффициенты сезонности.

In [96]:
rez = next_tr['trend'] * season['deviation'].values
rez

0    288,910,104.58
1    264,386,056.24
2    322,932,878.14
3    333,461,405.10
4    297,805,440.09
5    277,822,328.27
6    268,685,727.89
7    306,722,545.30
8    319,866,905.22
9    425,117,289.47
10   497,324,436.57
11   522,141,372.41
Name: trend, dtype: float64

### Отклонения

In [101]:
test = np.array([256798898, 232640416, 267994924, 262849522, 276933049, 251486085, 250559778, 261724749, 276675505, 287647539, 363102609, 422386052])
test-rez

0     -32,111,206.58
1     -31,745,640.24
2     -54,937,954.14
3     -70,611,883.10
4     -20,872,391.09
5     -26,336,243.27
6     -18,125,949.89
7     -44,997,796.30
8     -43,191,400.22
9    -137,469,750.47
10   -134,221,827.57
11    -99,755,320.41
Name: trend, dtype: float64