In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
from pmdarima.arima import auto_arima
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARIMA

In [3]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

# Use a service account.
cred = credentials.Certificate('menstrualperiodtracker-firebase-adminsdk-7j4c9-26fa2ae7ce.json')

app = firebase_admin.initialize_app(cred)

db = firestore.client()

In [4]:
users_ref = db.collection(u'main data')
docs = users_ref.stream()



field_name = 'cyclelength'
field_list = []

for doc in docs:
    doc_dict = doc.to_dict()
    field_value = doc_dict.get(field_name)
    if field_value is not None:
        field_list.append(field_value)

print(field_list)
   

[28, 29, 28, 30, 27, 30, 29, 30, 28, 30, 28, 27, 31, 31, 29, 28, 28, 28, 29, 28, 29, 30, 31, 30, 29, 29, 29, 27, 29, 28, 28, 27, 29, 29, 30, 28, 27, 30, 29, 29, 28, 29, 29]


In [5]:
df=pd.DataFrame(field_list)

In [7]:
import numpy as np
from statsmodels.tsa.stattools import adfuller

def check_stationarity(data):
    result = adfuller(data)
    pvalue = result[1]
    if pvalue < 0.05:
        print("The data is stationary with p-value", pvalue)
    else:
        print("The data is not stationary with p-value", pvalue)
    
    return pvalue

# check for stationarity in the data
pvalue = check_stationarity(df)

# apply differencing to make the data stationary if necessary
if pvalue >= 0.05:
    differenced_data = df.diff().dropna()
    check_stationarity(differenced_data)
else:
    differenced_data = df




The data is stationary with p-value 0.00024192779218585269


In [8]:
# spilting the dataset
from sklearn.model_selection import train_test_split

X = differenced_data # dataset

train_size = 0.75
test_size = 1 - train_size
X_train, X_test = train_test_split(X, train_size=train_size, test_size=test_size, shuffle=False)


In [9]:
arima_model =  auto_arima(X_train,start_p=0, d=0, start_q=0, 
                          max_p=5, max_d=0, max_q=5, start_P=0, 
                          D=1, start_Q=0, max_P=5, max_D=5,
                          max_Q=5, m=4, seasonal=True, 
                          error_action='warn',trace = True,
                          supress_warnings=True,stepwise = True,
                          random_state=20,n_fits = 50 )

Performing stepwise search to minimize aic
 ARIMA(0,0,0)(0,1,0)[4] intercept   : AIC=110.168, Time=0.01 sec
 ARIMA(1,0,0)(1,1,0)[4] intercept   : AIC=106.535, Time=0.02 sec
 ARIMA(0,0,1)(0,1,1)[4] intercept   : AIC=103.842, Time=0.02 sec
 ARIMA(0,0,0)(0,1,0)[4]             : AIC=108.292, Time=0.01 sec
 ARIMA(0,0,1)(0,1,0)[4] intercept   : AIC=108.789, Time=0.01 sec
 ARIMA(0,0,1)(1,1,1)[4] intercept   : AIC=105.788, Time=0.03 sec
 ARIMA(0,0,1)(0,1,2)[4] intercept   : AIC=105.775, Time=0.04 sec
 ARIMA(0,0,1)(1,1,0)[4] intercept   : AIC=106.441, Time=0.02 sec
 ARIMA(0,0,1)(1,1,2)[4] intercept   : AIC=107.764, Time=0.09 sec
 ARIMA(0,0,0)(0,1,1)[4] intercept   : AIC=102.240, Time=0.03 sec
 ARIMA(0,0,0)(1,1,1)[4] intercept   : AIC=104.212, Time=0.04 sec
 ARIMA(0,0,0)(0,1,2)[4] intercept   : AIC=104.205, Time=0.04 sec
 ARIMA(0,0,0)(1,1,0)[4] intercept   : AIC=105.990, Time=0.02 sec
 ARIMA(0,0,0)(1,1,2)[4] intercept   : AIC=106.193, Time=0.08 sec
 ARIMA(1,0,0)(0,1,1)[4] intercept   : AIC=103.7

In [10]:

prediction = pd.DataFrame(arima_model.predict(n_periods = 100),index=X_test.index)
prediction.columns = ['predicted_Cyclelength']
prediction

Unnamed: 0,predicted_Cyclelength
32,28.779987
33,29.142291
34,28.907609
35,28.074088
36,28.779987
37,29.142291
38,28.907609
39,28.074088
40,28.779987
41,29.142291


In [12]:
X_full = pd.concat([X_train, X_test])
arima_model.fit(X_full)
future_predictions = arima_model.predict(n_periods=10)

future_predictions_df = pd.DataFrame(future_predictions, index=pd.date_range(start=X_test.index[-1], periods=10, freq='D'))
future_predictions_df.columns = ['predicted_Cyclelength']

