In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install kats

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from matplotlib import rcParams
from kats.consts import TimeSeriesData
from kats.detectors.cusum_detection import CUSUMDetector
from statsmodels.tsa.stattools import adfuller


warnings.simplefilter('ignore')

In [None]:
df = pd.read_csv('/kaggle/input/alibaba-stock-data/BABA.csv',parse_dates=['Date'])
df

In [None]:
df.info()

In [None]:
plt.figure(figsize=(10,5))
plt.plot(df['Date'],df['Open'])
plt.plot(df['Date'],df['Close'])

In [None]:
plt.figure(figsize=(10,5))
plt.plot(df['Date'],df['High'])
plt.plot(df['Date'],df['Low'])

In [None]:
data = df.copy()

In [None]:
data.set_index('Date',inplace= True)

In [None]:
data['diff_value'] = data['Close'] - data['Open']
data

In [None]:
data.diff_value.plot()

In [None]:
mean = data['Close'] + data['Open']
data['mean_value'] = mean/2
data

In [None]:
data.describe()

In [None]:
rcParams['figure.figsize'] = 15,8
p = sns.relplot(x = data.index , y='mean_value',data = data,kind = 'line')

In [None]:
stocks = data.iloc[:,0:4]
sns.lineplot(data= stocks)

In [None]:
ts_data = data.copy()
ts_data.reset_index(inplace = True)

In [None]:
ts_data

In [None]:
ts_data.info()

In [None]:
names = ['time','value']

In [None]:
#cuscum detection point

subset_ts = ts_data[['Date','mean_value']]
subset_ts.columns = names


cuscum_point = TimeSeriesData(subset_ts)
detector = CUSUMDetector(cuscum_point)
change_points = detector.detector()

plt.xticks(rotation=45)
detector.plot(change_points)
plt.show()

In [None]:
sub_data = data[['mean_value']]

train_data = sub_data.iloc[0:1380,:]
test_data = sub_data.drop(train_data.index)

In [None]:
train_data

In [None]:
test_data

In [None]:
def testing(timeseries):
  #Determing rolling statistics
  rolmean = timeseries.rolling(window=12).mean()
  rolstd = timeseries.rolling(window=12).std()
      
  #Plot rolling statistics:
  plt.figure(figsize=(20,6))
  orig = plt.plot(timeseries, color='blue',label='Original')
  mean = plt.plot(rolmean, color='red', label='Rolling Mean')
  std = plt.plot(rolstd, color='black', label = 'Rolling Std')
  plt.legend(loc='best')
  plt.title('Rolling Mean & Standard Deviation')
  plt.show(block=False)

  #dickey-fuller test 

  print('Results of Dickey-Fuller Test:')
  dftest = adfuller(timeseries, autolag='AIC')
  dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
  for key,value in dftest[4].items():
    dfoutput['Critical Value (%s)'%key] = value
  print(dfoutput.round(2))

In [None]:
testing(sub_data)

In [None]:
y = sub_data['mean_value']

fig, ax = plt.subplots(figsize=(20, 6))
ax.plot(y,marker='.', linestyle='-', linewidth=0.5, label=' Monthly')
ax.plot(y.resample('y').mean(),marker='o', markersize=8, linestyle='-', label='Yearly Mean Resample')
ax.set_ylabel('Orders')
ax.legend()

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX


model=SARIMAX(train_data,order=(1,2,1),seasonal_order=(1, 0, 0, 12))
result=model.fit()

In [None]:
result.resid.plot(kind='kde')

In [None]:
result

In [None]:
predictions =result.predict(start= 1381,end=1738)

In [None]:
predictions

In [None]:
predictions.head(10)

In [None]:
test_data.mean_value.head(10)