In [1]:
import datetime
import math
import numpy as np
import pandas as pd

In [3]:
adm = pd.read_csv('FONTE/prophet_dataset.csv',sep='\t')

In [4]:
adm['dia']=adm['ds'].apply(lambda x: x.split(' ')[0])
data=adm.groupby('dia')['y'].sum().reset_index()
data['dia']=pd.to_datetime(data['dia'], format='%Y-%m-%d')
data['ds']=data['dia']
data=data[['ds','y']]

In [5]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
ts=data['y']
### Import Packages ###

import itertools
import statsmodels.api as sm

### Define Parameter Ranges to Test ###

# Note: higher numbers will result in code taking much longer to run
# Here we have it set to test p,d,q each = 0, 1 & 2

# Define the p, d and q parameters to take any value between 0 and 3 (exclusive)
p = d = q = range(0, 3)

# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))

# Generate all different combinations of seasonal p, q and q triplets
# Note: here we have 12 in the 's' position as we have monthly data
# You'll want to change this according to your time series' frequency
pdqs = [(x[0], x[1], x[2], 60) for x in list(itertools.product(p, d, q))]

### Run Grid Search ###

# Note: this code will take a while to run

# Define function
def sarimax_gridsearch(ts, pdq, pdqs, maxiter=50, freq='M'):
   # '''
    #Input: 
     #   ts : your time series data
      #  pdq : ARIMA combinations from above
       # pdqs : seasonal ARIMA combinations from above
       # maxiter : number of iterations, increase if your model isn't converging
       # frequency : default='M' for month. Change to suit your time series frequency
        #    e.g. 'D' for day, 'H' for hour, 'Y' for year. 
        
    #Return:
     #   Prints out top 5 parameter combinations
      #  Returns dataframe of parameter combinations ranked by BIC
    #'''

    # Run a grid search with pdq and seasonal pdq parameters and get the best BIC value
    ans = []
    for comb in pdq:
        for combs in pdqs:
            try:
                
                mod = sm.tsa.statespace.SARIMAX(ts,
                                                order=comb,
                                                seasonal_order=combs)

                output = mod.fit(maxiter=maxiter) 
                ans.append([comb, combs, output.bic])
                print('SARIMAX {} x {}60 : BIC Calculated ={}'.format(comb, combs, output.bic))
            except:
                continue
            
    # Find the parameters with minimal BIC value

    # Convert into dataframe
    ans_df = pd.DataFrame(ans, columns=['pdq', 'pdqs', 'bic'])

    # Sort and return top 5 combinations
    ans_df = ans_df.sort_values(by=['bic'],ascending=True)[0:5]
    
    return ans_df
    
### Apply function to your time series data ###

# Remember to change frequency to match your time series data
sarimax_gridsearch(ts, pdq, pdqs, freq='M')

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            1     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  7.19325D+00    |proj g|=  8.88178D-11

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    1      0      1      0     0     0   8.882D-11   7.193D+00
  F =   7.1932480117281585     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
SARIMAX (0, 0, 0) x (0, 0, 0, 60)60 : BIC Calculated =21025.95756668143


 This problem is unconstrained.
 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            2     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  7.01231D+00    |proj g|=  4.12548D-01

At iterate    5    f=  6.88654D+00    |proj g|=  3.13991D-03

At iterate   10    f=  6.69016D+00    |proj g|=  2.28837D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    2     11     14      1     0     0   7.506D-06   6.690D+00
  F =   6.6901572617980358     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
SARIMAX (0, 0, 0) x (0, 0, 1, 60)60 : BIC Calculated =19563.2132717973

 This problem is unconstrained.



At iterate    5    f=  6.81091D+00    |proj g|=  3.97739D-03

At iterate   10    f=  6.78380D+00    |proj g|=  5.64098D-02

At iterate   15    f=  6.57743D+00    |proj g|=  3.56287D-01

At iterate   20    f=  6.36881D+00    |proj g|=  1.58368D-01

At iterate   25    f=  6.33330D+00    |proj g|=  3.83995D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     29     53      1     0     0   3.219D-06   6.333D+00
  F =   6.3331162269932593     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
SARIMAX (0, 0, 0) x (0, 0, 2, 60)60 : BIC Calculated =18527.226244509555
RUNNING THE L-BFGS-B CODE

           * 

 This problem is unconstrained.


SARIMAX (0, 0, 0) x (0, 1, 0, 60)60 : BIC Calculated =15626.300355385241


 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            2     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.16434D+00    |proj g|=  3.77157D-02

At iterate    5    f=  5.11098D+00    |proj g|=  7.64924D-03

At iterate   10    f=  5.10032D+00    |proj g|=  6.43848D-04

At iterate   15    f=  5.10014D+00    |proj g|=  2.72116D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    2     17     21      1     0     0   2.868D-06   5.100D+00
  F =   5.1001337123759951     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
SARIMAX 

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.17125D+00    |proj g|=  5.58052D-02

At iterate    5    f=  5.16563D+00    |proj g|=  3.72877D-02

At iterate   10    f=  5.11161D+00    |proj g|=  5.92671D-02

At iterate   15    f=  5.10058D+00    |proj g|=  1.61714D-02

At iterate   20    f=  5.09993D+00    |proj g|=  2.19473D-04

At iterate   25    f=  5.09990D+00    |proj g|=  3.51257D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     28     32      1     0     0   

 This problem is unconstrained.



           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    1      3      4      1     0     0   4.473D-06   5.573D+00
  F =   5.5725602701667292     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
SARIMAX (0, 0, 0) x (0, 2, 0, 60)60 : BIC Calculated =16290.222280310465


  warn('Non-invertible starting seasonal moving average'


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            2     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.58003D+00    |proj g|=  6.02305D-01


 This problem is unconstrained.



At iterate    5    f=  5.29735D+00    |proj g|=  4.68476D-03

At iterate   10    f=  5.19969D+00    |proj g|=  5.04032D-06

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    2     10     14      1     0     0   5.040D-06   5.200D+00
  F =   5.1996860241496599     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            


In [6]:
adm

Unnamed: 0,ds,y
0,2017-01-01 01:00:00,56
1,2017-01-01 02:00:00,74
2,2017-01-01 03:00:00,118
3,2017-01-01 04:00:00,66
4,2017-01-02 01:00:00,31
...,...,...
5839,2020-12-30 04:00:00,43
5840,2020-12-31 01:00:00,32
5841,2020-12-31 02:00:00,92
5842,2020-12-31 03:00:00,83


In [None]:
adm['dia']=adm['ds'].apply(lambda x: x.split(' ')[0])

In [3]:
adm.columns

Index(['data', 'dia_semana', 'feriado', 'festas', 'date_x', 'mean_x', 'date_y',
       'mean_y', 'campeonato', '1x2', 'resultado', 'Casa', 'Data',
       'Precipitacao diaria (mm)', 'Velocidade do vento media diaria (m/s)',
       'DTA_ADMISSAO', 'turno', 'URG_EPISODIO'],
      dtype='object')

In [4]:
adm=adm[['data','dia_semana','feriado','festas','mean_x','mean_y','Casa','campeonato','Precipitacao diaria (mm)','Velocidade do vento media diaria (m/s)','turno','URG_EPISODIO']]

In [5]:
ano=[]
for x in adm['data'].iloc:
    ano.append(x.split('-')[0])

In [6]:
adm['ano']=ano

In [7]:
adm['ano']=adm['ano'].astype(int)

In [8]:
adm_final=adm[adm['ano']>2016]

In [9]:
adm_final=adm_final.drop(columns=['ano'])

In [10]:
adm_final

Unnamed: 0,data,dia_semana,feriado,festas,mean_x,mean_y,Casa,campeonato,Precipitacao diaria (mm),Velocidade do vento media diaria (m/s),turno,URG_EPISODIO
10228,2017-01-01,Sunday,Ano Novo,,-2.928487,7.802283,,,5.7,0.0,1,56
10229,2017-01-01,Sunday,Ano Novo,,-2.928487,7.802283,,,5.7,0.0,2,74
10230,2017-01-01,Sunday,Ano Novo,,-2.928487,7.802283,,,5.7,0.0,3,118
10231,2017-01-01,Sunday,Ano Novo,,-2.928487,7.802283,,,5.7,0.0,4,66
10232,2017-01-02,Monday,,,6.628056,12.706052,,,0.1,0.0,1,31
...,...,...,...,...,...,...,...,...,...,...,...,...
17187,2021-10-06,Wednesday,,,,,,,0.0,0.2,4,69
17188,2021-10-07,Thursday,,,,,,,0.0,0.3,1,29
17189,2021-10-07,Thursday,,,,,,,0.0,0.3,2,151
17190,2021-10-07,Thursday,,,,,,,0.0,0.3,3,123


In [11]:
dia=[]
mes=[]
ano=[]
for x in adm_final['data'].iloc:
    y=x.split('-')
    dia.append(y[2])
    mes.append(y[1])
    ano.append(y[0])
adm_final['dia']=dia
adm_final['mes']=mes
adm_final['ano']=ano

In [12]:
adm_final=adm_final.drop(columns=['data'])

In [13]:
adm_final['ano']=adm_final['ano'].astype(int)
adm_final['mes']=adm_final['mes'].astype(int)
adm_final['dia']=adm_final['dia'].astype(int)

In [14]:
from collections import defaultdict
temp = defaultdict(lambda: len(temp))
adm_final['dia_semana'] = [temp[ele] for ele in adm_final['dia_semana']]
from collections import defaultdict
temp = defaultdict(lambda: len(temp))
adm_final['feriado'] = [temp[ele] for ele in adm_final['feriado']]
from collections import defaultdict
temp = defaultdict(lambda: len(temp))
adm_final['feriado'] = [temp[ele] for ele in adm_final['feriado']]
from collections import defaultdict
temp = defaultdict(lambda: len(temp))
adm_final['festas'] = [temp[ele] for ele in adm_final['festas']]
from collections import defaultdict
temp = defaultdict(lambda: len(temp))
adm_final['campeonato'] = [temp[ele] for ele in adm_final['campeonato']]

In [15]:
adm_final['Casa']=adm_final['Casa'].replace(np.nan,-1)

In [16]:
adm_final['Casa']=adm_final['Casa'].replace(np.nan,-1)

In [17]:
adm_final['Precipitacao diaria (mm)']=adm_final['Precipitacao diaria (mm)'].replace(np.nan,-1)
adm_final['Velocidade do vento media diaria (m/s)']=adm_final['Velocidade do vento media diaria (m/s)'].replace(np.nan,-1)

In [18]:
adm_final=adm_final[adm_final['ano']<2021]

In [19]:
adm_final['mean_x']=adm_final['mean_x'].replace(np.nan,-1000)
adm_final['mean_y']=adm_final['mean_y'].replace(np.nan,-1000)

In [60]:
y=adm_final['URG_EPISODIO']

In [21]:
X=adm_final.drop(columns=['URG_EPISODIO'])

In [22]:
pd.set_option('display.max_rows', 7000)

In [23]:
X.dtypes

dia_semana                                  int64
feriado                                     int64
festas                                      int64
mean_x                                    float64
mean_y                                    float64
Casa                                      float64
campeonato                                  int64
Precipitacao diaria (mm)                  float64
Velocidade do vento media diaria (m/s)    float64
turno                                       int64
dia                                         int32
mes                                         int32
ano                                         int32
dtype: object

In [24]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from numpy import mean
from numpy import absolute
from numpy import sqrt
from numpy import std

In [25]:
from sklearn.linear_model import LinearRegression

cv = KFold(n_splits=10, random_state=1, shuffle=True)

model = LinearRegression()

for c in ['neg_mean_absolute_error','neg_mean_squared_error','r2','explained_variance']:
    scores = cross_val_score(model, X, y, scoring=c, cv=cv, n_jobs=-1)
    print(c+' : %.3f (%.3f)' % (mean(abs(scores)), std(scores)))

neg_mean_absolute_error : 36.193 (0.608)
neg_mean_squared_error : 1583.951 (71.184)
r2 : 0.021 (0.018)
explained_variance : 0.025 (0.020)


In [26]:
from sklearn.tree import DecisionTreeRegressor 

cv = KFold(n_splits=10, random_state=1, shuffle=True)

model = RandomForestRegressor()

for c in ['neg_mean_absolute_error','neg_mean_squared_error','r2','explained_variance']:
    scores = cross_val_score(model, X, y, scoring=c, cv=cv, n_jobs=-1)
    print(c+' : %.3f (%.3f)' % (mean(abs(scores)), std(scores)))

neg_mean_absolute_error : 11.630 (2.476)
neg_mean_squared_error : 258.464 (117.371)
r2 : 0.840 (0.070)
explained_variance : 0.858 (0.057)


In [27]:
from sklearn.model_selection import cross_val_predict
predicted = cross_val_predict(model, X, y, cv=cv)

In [61]:
teste=X[['turno','dia','mes','ano']]

In [62]:
teste['valor real']=y
teste['valor previsto']=predicted

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  teste['valor real']=y
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  teste['valor previsto']=predicted


In [63]:
turno1=teste[teste['turno']==1]
turno2=teste[teste['turno']==2]
turno3=teste[teste['turno']==3]
turno4=teste[teste['turno']==4]

In [64]:
med1=turno1['valor real'].mean()

In [32]:
z=0
for x in turno1.iloc:
    y=abs(x['valor real']-x['valor previsto'])
    z=z+y
erro_med1=z/len(turno1)

In [33]:
med2=turno2['valor real'].mean()

In [34]:
z=0
for x in turno2.iloc:
    y=abs(x['valor real']-x['valor previsto'])
    z=z+y
erro_med2=z/len(turno2)

In [35]:
med3=turno3['valor real'].mean()

In [36]:
z=0
for x in turno3.iloc:
    y=abs(x['valor real']-x['valor previsto'])
    z=z+y
erro_med3=z/len(turno3)

In [37]:
med4=turno4['valor real'].mean()

In [38]:
z=0
for x in turno4.iloc:
    y=abs(x['valor real']-x['valor previsto'])
    z=z+y
erro_med4=z/len(turno4)

In [39]:
erro_med1/med1*100

20.182426226889742

In [40]:
erro_med2/med2*100

13.29215479492882

In [41]:
erro_med3/med3*100

12.964796385090951

In [42]:
erro_med4/med4*100

17.110771434784283

In [7]:
adm

Unnamed: 0,ds,y
0,2017-01-01 01:00:00,56
1,2017-01-01 02:00:00,74
2,2017-01-01 03:00:00,118
3,2017-01-01 04:00:00,66
4,2017-01-02 01:00:00,31
...,...,...
5839,2020-12-30 04:00:00,43
5840,2020-12-31 01:00:00,32
5841,2020-12-31 02:00:00,92
5842,2020-12-31 03:00:00,83


In [5]:
import pandas as pd
from prophet import Prophet

In [6]:
data=[]
for x in teste.iloc:
    y=str(x['dia'].astype(int))+'-'+str(x['mes'].astype(int))+'-'+str(x['ano'].astype(int))+' '+str(x['turno'].astype(int))
    data.append(y)

NameError: name 'teste' is not defined

In [69]:
data

['1-1-2017 1',
 '1-1-2017 2',
 '1-1-2017 3',
 '1-1-2017 4',
 '2-1-2017 1',
 '2-1-2017 2',
 '2-1-2017 3',
 '2-1-2017 4',
 '3-1-2017 1',
 '3-1-2017 2',
 '3-1-2017 3',
 '3-1-2017 4',
 '4-1-2017 1',
 '4-1-2017 2',
 '4-1-2017 3',
 '4-1-2017 4',
 '5-1-2017 1',
 '5-1-2017 2',
 '5-1-2017 3',
 '5-1-2017 4',
 '6-1-2017 1',
 '6-1-2017 2',
 '6-1-2017 3',
 '6-1-2017 4',
 '7-1-2017 1',
 '7-1-2017 2',
 '7-1-2017 3',
 '7-1-2017 4',
 '8-1-2017 1',
 '8-1-2017 2',
 '8-1-2017 3',
 '8-1-2017 4',
 '9-1-2017 1',
 '9-1-2017 2',
 '9-1-2017 3',
 '9-1-2017 4',
 '10-1-2017 1',
 '10-1-2017 2',
 '10-1-2017 3',
 '10-1-2017 4',
 '11-1-2017 1',
 '11-1-2017 2',
 '11-1-2017 3',
 '11-1-2017 4',
 '12-1-2017 1',
 '12-1-2017 2',
 '12-1-2017 3',
 '12-1-2017 4',
 '13-1-2017 1',
 '13-1-2017 2',
 '13-1-2017 3',
 '13-1-2017 4',
 '14-1-2017 1',
 '14-1-2017 2',
 '14-1-2017 3',
 '14-1-2017 4',
 '15-1-2017 1',
 '15-1-2017 2',
 '15-1-2017 3',
 '15-1-2017 4',
 '16-1-2017 1',
 '16-1-2017 2',
 '16-1-2017 3',
 '16-1-2017 4',
 '17-1-2017 

In [70]:
teste['data']=data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  teste['data']=data


In [71]:
teste=teste[['data','valor real']]

In [72]:
teste['data']=pd.to_datetime(teste['data'], format='%d-%m-%Y %H')

In [73]:
teste.rename(columns = {'data':'ds', 'valor real':'y'}, inplace = True)

In [8]:
teste=adm

In [43]:
teste['ds']=pd.to_datetime(teste['ds'], format='%Y-%m-%d %H')

In [49]:
teste['turno']=teste['ds'].apply(lambda x:x.hour)

In [50]:
turno1=teste[teste['turno']==1]
turno2=teste[teste['turno']==2]
turno3=teste[teste['turno']==3]
turno4=teste[teste['turno']==4]

In [56]:
m = Prophet()
predict1 = m.fit(turno1)
future = m.make_future_dataframe(periods=365)
t1=m.predict(future)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [57]:
m = Prophet()
predict2 = m.fit(turno2)
future = m.make_future_dataframe(periods=365)
t2=m.predict(future)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [58]:
m = Prophet()
predict3 = m.fit(turno3)
future = m.make_future_dataframe(periods=365)
t3=m.predict(future)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [59]:
m = Prophet()
predict4 = m.fit(turno4)
future = m.make_future_dataframe(periods=365)
t4=m.predict(future)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [60]:
h1=t1[['ds','yhat']]
h2=t2[['ds','yhat']]
h3=t3[['ds','yhat']]
h4=t4[['ds','yhat']]

In [61]:
h1['ds']=pd.to_datetime(h1['ds'], format='%d-%m-%Y %H')
h2['ds']=pd.to_datetime(h2['ds'], format='%d-%m-%Y %H')
h3['ds']=pd.to_datetime(h3['ds'], format='%d-%m-%Y %H')
h4['ds']=pd.to_datetime(h4['ds'], format='%d-%m-%Y %H')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  h1['ds']=pd.to_datetime(h1['ds'], format='%d-%m-%Y %H')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  h2['ds']=pd.to_datetime(h2['ds'], format='%d-%m-%Y %H')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  h3['ds']=pd.to_datetime(h3['ds'], format='%d-%m-%Y %H')
A value is trying to be set on a copy

In [62]:
turno1['ds']=pd.to_datetime(turno1['ds'], format='%Y-%m-%d %H')
turno2['ds']=pd.to_datetime(turno2['ds'], format='%Y-%m-%d %H')
turno3['ds']=pd.to_datetime(turno3['ds'], format='%Y-%m-%d %H')
turno4['ds']=pd.to_datetime(turno4['ds'], format='%Y-%m-%d %H')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  turno1['ds']=pd.to_datetime(turno1['ds'], format='%Y-%m-%d %H')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  turno2['ds']=pd.to_datetime(turno2['ds'], format='%Y-%m-%d %H')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  turno3['ds']=pd.to_datetime(turno3['ds'], format='%Y-%m-%d %H')
A value is tr

In [33]:
h

Unnamed: 0,ds,yhat
0,2017-01-01 01:00:00,25.605350
1,2017-01-01 02:00:00,111.307825
2,2017-01-01 03:00:00,103.993385
3,2017-01-01 04:00:00,46.695072
4,2017-01-02 01:00:00,43.794782
...,...,...
6204,2021-12-27 04:00:00,56.868029
6205,2021-12-28 04:00:00,47.905425
6206,2021-12-29 04:00:00,47.841726
6207,2021-12-30 04:00:00,48.982150


In [34]:
teste

Unnamed: 0,ds,y
0,2017-01-01 01:00:00,56
1,2017-01-01 02:00:00,74
2,2017-01-01 03:00:00,118
3,2017-01-01 04:00:00,66
4,2017-01-02 01:00:00,31
...,...,...
5839,2020-12-30 04:00:00,43
5840,2020-12-31 01:00:00,32
5841,2020-12-31 02:00:00,92
5842,2020-12-31 03:00:00,83


In [63]:
final1 = pd.merge(turno1,h1, on='ds')
final2 = pd.merge(turno2,h2, on='ds')
final3 = pd.merge(turno3,h3, on='ds')
final4 = pd.merge(turno4,h4, on='ds')

In [65]:
z=0
for x in final1.iloc:
    y=abs(x['y']-x['yhat'])
    z=z+y
erro_med1=z/len(final1)

z=0
for x in final2.iloc:
    y=abs(x['y']-x['yhat'])
    z=z+y
erro_med2=z/len(final2)

z=0
for x in final3.iloc:
    y=abs(x['y']-x['yhat'])
    z=z+y
erro_med3=z/len(final3)

z=0
for x in final4.iloc:
    y=abs(x['y']-x['yhat'])
    z=z+y
erro_med4=z/len(final4)

In [66]:
med1=final1['y'].mean()
med2=final2['y'].mean()
med3=final3['y'].mean()
med4=final4['y'].mean()

In [67]:
erro_med1/med1*100

16.013241055640375

In [68]:
erro_med2/med2*100

10.938157520917233

In [69]:
erro_med3/med3*100

10.610667467012128

In [70]:
erro_med4/med4*100

14.287409462838388

In [71]:
final1

Unnamed: 0,ds,y,turno,yhat
0,2017-01-01 01:00:00,56,1,48.385580
1,2017-01-02 01:00:00,31,1,40.425103
2,2017-01-03 01:00:00,33,1,36.451621
3,2017-01-04 01:00:00,33,1,35.776451
4,2017-01-05 01:00:00,40,1,35.558813
...,...,...,...,...
1456,2020-12-27 01:00:00,26,1,40.744963
1457,2020-12-28 01:00:00,30,1,33.195793
1458,2020-12-29 01:00:00,24,1,29.605253
1459,2020-12-30 01:00:00,26,1,29.277176


In [72]:
final2

Unnamed: 0,ds,y,turno,yhat
0,2017-01-01 02:00:00,74,2,96.086716
1,2017-01-02 02:00:00,148,2,140.609778
2,2017-01-03 02:00:00,122,2,125.099210
3,2017-01-04 02:00:00,124,2,121.710964
4,2017-01-05 02:00:00,127,2,123.422505
...,...,...,...,...
1456,2020-12-27 02:00:00,75,2,84.875857
1457,2020-12-28 02:00:00,116,2,129.501297
1458,2020-12-29 02:00:00,122,2,114.143148
1459,2020-12-30 02:00:00,123,2,110.949944


In [73]:
h1

Unnamed: 0,ds,yhat
0,2017-01-01 01:00:00,48.385580
1,2017-01-02 01:00:00,40.425103
2,2017-01-03 01:00:00,36.451621
3,2017-01-04 01:00:00,35.776451
4,2017-01-05 01:00:00,35.558813
...,...,...
1821,2021-12-27 01:00:00,26.444271
1822,2021-12-28 01:00:00,22.971550
1823,2021-12-29 01:00:00,22.758560
1824,2021-12-30 01:00:00,22.955615


In [74]:
h2

Unnamed: 0,ds,yhat
0,2017-01-01 02:00:00,96.086716
1,2017-01-02 02:00:00,140.609778
2,2017-01-03 02:00:00,125.099210
3,2017-01-04 02:00:00,121.710964
4,2017-01-05 02:00:00,123.422505
...,...,...
1821,2021-12-27 02:00:00,118.352959
1822,2021-12-28 02:00:00,102.998926
1823,2021-12-29 02:00:00,99.825634
1824,2021-12-30 02:00:00,101.800964


In [75]:
import numpy as np
import pandas as pd

import plotly
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)
cf.go_offline()

In [90]:
graf=pd.merge(h,teste, on='ds', how='left')
graf=graf[['ds','y','yhat']]
graf=graf.set_index('ds')

In [84]:
graf1=pd.merge(h1,turno1, on='ds', how='left')
graf1=graf1[['ds','y','yhat']]
graf1=graf1.set_index('ds')

graf2=pd.merge(h2,turno2, on='ds', how='left')
graf2=graf2[['ds','y','yhat']]
graf2=graf2.set_index('ds')

graf3=pd.merge(h3,turno3, on='ds', how='left')
graf3=graf3[['ds','y','yhat']]
graf3=graf3.set_index('ds')

graf4=pd.merge(h4,turno4, on='ds', how='left')
graf4=graf4[['ds','y','yhat']]
graf4=graf4.set_index('ds')

In [92]:
graf.iplot()

In [85]:
graf1.iplot()

In [86]:
graf2.iplot()

In [87]:
graf3.iplot()

In [88]:
graf4.iplot()