In [None]:
!pip install feature-engine
!pip install arcticdata=='1.4'

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import arcticdata.ExploratoryDataAnalysis as eda
import arcticdata.FeatureEngineering as fe
import arcticdata.FeatureSelection as fs

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('/kaggle/input/rossmann-store-sales/train.csv')
test = pd.read_csv('/kaggle/input/rossmann-store-sales/test.csv')
shops = pd.read_csv('/kaggle/input/rossmann-store-sales/store.csv')
train.head()

In [None]:
shops.head()

In [None]:
train.dtypes

In [None]:
eda.missing_data(train).head()

In [None]:
eda.missing_data(test).head()

In [None]:
eda.missing_data(shops).head()

In [None]:
train.hist(figsize=(20,20))
plt.show()

In [None]:
train = train[train.Open==1]
train.drop('Open',axis=1,inplace=True)
train.head()

In [None]:
shops[shops.CompetitionDistance.isnull()]

In [None]:
shops[shops.CompetitionOpenSinceMonth.isnull()]

In [None]:
shops[shops.Promo2==0]

In [None]:
strcols = ['CompetitionOpenSinceMonth','CompetitionOpenSinceYear','Promo2SinceWeek','Promo2SinceYear','PromoInterval']

for str in strcols:
    shops[str].fillna(0,inplace=True)

shops.head()

In [None]:
shops.CompetitionDistance.fillna(shops.CompetitionDistance.mean(),inplace=True)

shops.head()

In [None]:
shops_train = pd.merge(shops,train,how='inner',on='Store')
shops_train.head()

In [None]:
shops_train.corr()['Sales'].sort_values()

In [None]:
shops_train['Year']=pd.DatetimeIndex(shops_train.Date).year
shops_train['Month']=pd.DatetimeIndex(shops_train.Date).month
shops_train['Day']=pd.DatetimeIndex(shops_train.Date).day
shops_train.head()

In [None]:
shops_train.groupby('Month')[['Sales']].mean().plot(figsize=(10,5),marker='o')
plt.show()

In [None]:
shops_train.groupby('Month')[['Customers']].mean().plot(figsize=(10,5),marker='o')
plt.show()

In [None]:
shops_train.groupby('Day')[['Sales']].mean().plot(figsize=(10,5),marker='o')
plt.show()

In [None]:
shops_train.groupby('DayOfWeek')[['Sales']].mean().plot(figsize=(10,5),marker='o')
plt.show()

In [None]:
shops_train.groupby(['Date','StoreType']).mean()['Sales'].unstack().plot(figsize=(10,5))
plt.show()

In [None]:
from fbprophet import Prophet

In [None]:
def sales_pred(Store_id,df,holidays,periods):
    df = df[df['Store']==Store_id]
    df = df[['Date','Sales']].rename(columns={'Date':'ds','Sales':'y'})
    df = df.sort_values('ds')
    
    model = Prophet(holidays=holidays)
    model.fit(df)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    figure = model.plot(forecast,xlabel='Dates',ylabel='Sales')
    figure2 = model.plot_components(forecast)

In [None]:
school_holidays = shops_train[shops_train.SchoolHoliday==1]['Date'].values
school_holidays = np.unique(school_holidays)
school_holidays = pd.DataFrame({'ds':school_holidays,'holiday':'school_holiday'})
school_holidays.head()

In [None]:
state_holidays = shops_train[(shops_train.StateHoliday=='a')|(shops_train.StateHoliday=='b') | (shops_train.StateHoliday=='c')]['Date'].values
state_holidays = np.unique(state_holidays)
state_holidays = pd.DataFrame({'ds':state_holidays,'holiday':'state_holiday'})
state_holidays.head()

In [None]:
holidays = pd.concat((school_holidays,state_holidays),axis=0)
holidays

In [None]:
df = sales_pred(7,shops_train,holidays,90)