In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import zipfile
import scipy
import scipy.stats
from scipy.stats import pearsonr

In [3]:
data = pd.read_csv('../input/household/household_power_consumption.txt', sep = ';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime':[0,1]}, index_col=['datetime'])


In [4]:
data.head()


In [5]:
data.replace('?', 'nan', inplace=True)
data = data.astype('float32')


In [6]:
data.shape

In [7]:
data = data.fillna(data.mean())


Q.11

In [8]:
data_res = data.resample('D').sum()
print(data_res.head())

In [9]:
plt.figure(figsize=(10,6))
plt.plot(data_res.index, data_res.Global_active_power, '--', marker='*',)
plt.grid()
plt.xlabel('datetime')
plt.ylabel('Global active power')
plt.title('Plot of Global_active_power over time')


Q.13

In [10]:
corr,_ = pearsonr(data_res.Voltage,data_res.Global_intensity)
corr

Q.14

In [11]:
train_data = data_res.iloc[:1077,:]


In [12]:
train_data.shape

In [13]:
test_data = data_res.iloc[1077:,:]

In [14]:
test_data.shape

In [15]:
from fbprophet import Prophet

In [16]:
train_data.reset_index(inplace = True)
train_data.head()

In [17]:
df_train = train_data.iloc[:,:2]


In [18]:
df_train = df_train.rename(columns={"datetime": "ds", "Global_active_power": "y"})
df_train.head()

In [19]:
test_data.reset_index(inplace = True)
test_data.head()


In [20]:
df_tests = test_data.iloc[:,:2]

In [21]:
df_tests = df_tests.rename(columns= {"datetime": "ds", "Global_active_power": "y"})
df_tests.head()

In [22]:
model = Prophet()
model.fit(df_train)

In [23]:
forecast = model.predict(df_tests)
forecast.head()

In [24]:
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'trend_lower', 'trend_upper']].head()

In [25]:
model.plot(forecast)
plt.show()

In [27]:
from sklearn import metrics
def Metric(y_true,y_pred):
    y_true,y_pred = np.array(y_true),np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred)/y_true)) *100

In [28]:
MAPE = Metric(df_tests['y'],forecast['yhat'])
print(f'the Mean Average Percentage Error (MAPE) is: {round(MAPE,2)}')

Q.15

In [29]:
RMSE = np.sqrt(metrics.mean_squared_error(df_tests['y'],forecast['yhat']))
RMSE
round(RMSE,2)

Q.16

In [30]:
model.plot_components(forecast)

Q.17

In [31]:
data_res.reset_index(inplace=True)


In [32]:
data_res.head()

In [33]:
new_data_res = data_res.rename(columns = {'datetime':'ds','Global_active_power':'y', 'Global_reactive_power':'add1','Voltage':'add2', 'Global_intensity':'add3','Sub_metering_1':'add4', 'Sub_metering_2':'add5','Sub_metering_3':'add6'})
new_data_res.head()

In [34]:
new_train = new_data_res.iloc[:1077,:]
new_test = new_data_res.iloc[1077:,:]

In [35]:
model = Prophet(daily_seasonality=True)
model.add_regressor('add1')
model.add_regressor('add2')
model.add_regressor('add3')
model.add_regressor('add4')
model.add_regressor('add5')
model.add_regressor('add6')

model = model.fit(new_train)
new_forecast = model.predict(new_test)
new_forecast.head()

In [36]:
model.plot(new_forecast)
plt.show()

In [38]:
def Metric(y_true,y_pred):
    y_true,y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred)/y_true)) *100
MAPE = Metric(new_test['y'],new_forecast['yhat'])
round(MAPE,2)

Q.18

In [39]:
RMSE = np.sqrt(metrics.mean_squared_error(new_test['y'],new_forecast['yhat']))
RMSE
round(RMSE,2)

Q.19

In [40]:
model.plot_components(new_forecast)