In [None]:
# Data Preparation in Excel ---LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns

#reading excel
df = pd.read_csv('1weather_madrid_LEMD_1997_2015.csv')
print(df.head()) #including the Date. 

print(df.tail()) #7 columns, including the Date. 

#Separate dates for future plotting
train_dates = pd.to_datetime(df['Date'])
print(train_dates.tail(15)) #Check last few dates. 

#Variables for training
cols = list(df)[1:3]
#Date and volume columns are not used in training. 
print(cols) #[temperature, humidity]


#detect missing values-nothing found
df.isna().sum()

# temperature distribution
plt.figure(figsize=(15, 10))
sns.distplot(df['Mean TemperatureC'],bins=[i for i in range(0,61,5)], kde=False)
plt.title("Distribution of Temperatures")
plt.grid()
plt.show()

# Humidity distribution
plt.figure(figsize=(15, 10))
sns.distplot(df[' Mean Humidity'],bins=[i for i in range(0,61,5)], kde=False)
plt.title("Distribution of Humidity")
plt.grid()
plt.show()


#dataframe division
#New dataframe with only training data - mean temperature and humidity for all rows
df_for_training = df[cols].astype(float)

#LSTM uses sigmoid and tanh that are sensitive to magnitude so values need to be normalized
##normalize the dataset, both temperature and humidity values between -1 and 1
scaler = StandardScaler()
scaler = scaler.fit(df_for_training)
df_for_training_scaled = scaler.transform(df_for_training)

#As required for LSTM networks, we require to reshape an input data into n_samples x timesteps x n_features. : 2191*14*2
#In this example, the n_features is 2. We will make timesteps = 14 (past days data used for training). 

#Empty lists to be populated using formatted training data
trainX = []
trainY = []

n_future = 1   # Number of days we want to look into the future based on the past days.
n_past = 14  # Number of past days we want to use to predict the future.

#Reformat input data into a shape: (n_samples x timesteps x n_features)
#for i in range(14, 2191-1+1)
#trainx (i-14:i, 0:array[1])
#trainy (i+1-1:i+1,0)

for i in range(n_past, len(df_for_training_scaled) - n_future +1):
    trainX.append(df_for_training_scaled[i - n_past:i, 0:df_for_training.shape[1]])
    trainY.append(df_for_training_scaled[i + n_future - 1:i + n_future, 0])

trainX, trainY = np.array(trainX), np.array(trainY)

#LSTM
#In my case, trainX has a shape (2177, 14, 2). 
#2177 because we are looking back 14 days (2177 - 14 = 2163). 
#Remember that we cannot look back 14 days until we get to the 15th day. 
#Also, trainY has a shape (2177, 1). Our model only predicts a single value, but 
#it needs multiple variables (5 in my example) to make this prediction. 
#This is why we can only predict a single day after our training, the day after where our data ends.
#To predict more days in future, we need all the 5 variables which we do not have. 
#We need to predict all variables if we want to do that


# define the Autoencoder model

model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(trainY.shape[1]))

model.compile(optimizer='adam', loss='mse')
model.summary()

# fit the model
history = model.fit(trainX, trainY, epochs=10, batch_size=16, validation_split=0.1, verbose=1)
# Prediction

n_future=30
n_past=1

forecast_period_dates= pd.date_range(list(train_dates)[-n_past], periods=n_future,freq='1d').tolist()

#predict_period_dates = pd.date_range(list(train_dates)[-n_past], periods=n_days_for_prediction, freq=us_bd).tolist()


forecast=model.predict(trainX[n_future:])
forecast_copies=np.repeat(forecast,df_for_training.shape[1], axis=-1)
y_pred_future = scaler.inverse_transform(forecast_copies)[:,0]

n_future=60
n_past = 1
n_days_for_prediction=30  #let us predict past 15 days

predict_period_dates = pd.date_range(list(train_dates)[-n_past], periods=n_days_for_prediction, freq='1d').tolist()
print(predict_period_dates)

#Make prediction
prediction = model.predict(trainX[-n_days_for_prediction:]) #shape = (n, 1) where n is the n_days_for_prediction


#Perform inverse transformation to rescale back to original range
#Since we used 5 variables for transform, the inverse expects same dimensions
#Therefore, let us copy our values 5 times and discard them after inverse transform
prediction_copies = np.repeat(prediction, df_for_training.shape[1], axis=-1)
y_pred_future = scaler.inverse_transform(prediction_copies)[:,0]


# Convert timestamp to date
forecast_dates = []
for time_i in predict_period_dates:
    forecast_dates.append(time_i.date())

    
df_forecast = pd.DataFrame({'Date':np.array(forecast_dates), 'Temperature':y_pred_future})
df_forecast['Date']=pd.to_datetime(df_forecast['Date'])
#df_forecast, storing them in excel
with pd.ExcelWriter(r'C:\Users\i3\forecastLSTM.xlsx') as writer:
    df_forecast.to_excel(writer,sheet_name='df_forecast', index=False)

## humidity 
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns
#data preparation
df_h = pd.read_csv('weather_humidity.csv')
print(df_h.head()) #7 columns, including the Date.
print(df_h.tail()) #7 columns, including the Date.

#Separate dates for future plotting
train_dates_h = pd.to_datetime(df_h['Date'])
print(train_dates_h.tail(15)) #Check last few dates.

#Variables for training
cols_h = list(df_h)[1:3]
#Date and volume columns are not used in training.
print(cols_h) #['Open', 'High', 'Low', 'Close', 'Adj Close']

#New dataframe with only training data - mean temperature and humidity for all rows
df_for_training_h = df_h[cols_h].astype(float)

#LSTM uses sigmoid and tanh that are sensitive to magnitude so values need to be normalized
# normalize the dataset, both temperature and humidity values between -1 and 1
scaler = StandardScaler()
scaler = scaler.fit(df_for_training_h)

df_for_training_scaled_h = scaler.transform(df_for_training_h)
print("traning_scaled",len(df_for_training_scaled_h),df_for_training_scaled_h)

#As required for LSTM networks, we require to reshape an input data into n_samples x timesteps x n_features. : 2191*14*2
#In this example, the n_features is 2. We will make timesteps = 14 (past days data used for training).

#Empty lists to be populated using formatted training data
trainX_h = []
trainY_h = []

n_future_h = 1   # Number of days we want to look into the future based on the past days.
n_past_h = 14  # Number of past days we want to use to predict the future.

#Reformat input data into a shape: (n_samples x timesteps x n_features)
#for i in range(14, 2191-1+1)
#trainx (i-14:i, 0:array[1])
#trainy (i+1-1:i+1,0)

for i in range(n_past_h, len(df_for_training_scaled_h) - n_future_h +1):
    trainX_h.append(df_for_training_scaled_h[i - n_past_h:i, 0:df_for_training_h.shape[1]])
    trainY_h.append(df_for_training_scaled_h[i + n_future_h - 1:i + n_future_h, 0])

trainX_h, trainY_h = np.array(trainX_h), np.array(trainY_h)

trainY_h

#print('trainX shape == {}.'.format(trainX.shape))
#print('trainY shape == {}.'.format(trainY.shape))
#In my example, my df_for_training_scaled has a shape (2177, 14, 6)
#2177 refers to the number of data points and 6 refers to the columns (multi-variables).
#In my case, trainX has a shape (2177, 14, 2).
#2177 because we are looking back 14 days (2177 - 14 = 2163).
#Remember that we cannot look back 14 days until we get to the 15th day.
#Also, trainY has a shape (2177, 1). Our model only predicts a single value, but
#it needs multiple variables (5 in my example) to make this prediction.
#This is why we can only predict a single day after our training, the day after where our data ends.
#To predict more days in future, we need all the 5 variables which we do not have.
#We need to predict all variables if we want to do that

# define the Autoencoder model

model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(trainX_h.shape[1], trainX_h.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(trainY_h.shape[1]))

model.compile(optimizer='adam', loss='mse')
model.summary()

# fit the model
history = model.fit(trainX_h, trainY_h, epochs=10, batch_size=16, validation_split=0.1, verbose=1)

##
n_future_h=30
n_past_h=1

forecast_period_dates_h= pd.date_range(list(train_dates_h)[-n_past_h], periods=n_future_h,freq='1d').tolist()
forecast_h=model.predict(trainX_h[n_future_h:])
forecast_copies_h=np.repeat(forecast_h,df_for_training_h.shape[1], axis=-1)
y_pred_future_h = scaler.inverse_transform(forecast_copies_h)[:,0]


n_future_h=60
n_past_h = 1
n_days_for_prediction_h=30  #let us predict past 15 days

predict_period_dates_h = pd.date_range(list(train_dates_h)[-n_past_h], periods=n_days_for_prediction_h, freq='1d').tolist()

#Make prediction
prediction_h = model.predict(trainX_h[-n_days_for_prediction_h:]) #shape = (n, 1) where n is the n_days_for_prediction
#Perform inverse transformation to rescale back to original range
#Since we used 5 variables for transform, the inverse expects same dimensions
#Therefore, let us copy our values 5 times and discard them after inverse transform
prediction_copies_h = np.repeat(prediction_h, df_for_training_h.shape[1], axis=-1)
y_pred_future_h = scaler.inverse_transform(prediction_copies_h)[:,0]

df_forecast_h = pd.DataFrame({'Date':np.array(predict_period_dates_h), 'Humidity':y_pred_future_h})

#df_forecast, storing them in excel
with pd.ExcelWriter(r'C:\Users\i3\forecastLSTM_h.xlsx') as writer:
    df_forecast_h.to_excel(writer,sheet_name='df_forecast_h', index=False)

# comparing forecast with raspberry pi --for Temperature

#print(type(df_forecast))
df_forecast['Date'] = pd.to_datetime(df_forecast['Date']).dt.date

forecast_dict=df_forecast.set_index('Date').T.to_dict('list')
##print(forecast_dict)
##print(forecast_dict["2022-04-30"])
for rec in forecast_dict.keys():
    temp=(forecast_dict.get(rec))
  #  print(rec,temp[0],type(temp[0]))
    
df_rasp = pd.read_csv('rasp_input.csv')
#print(df_rasp.head()) #7 columns, including the Date.
#print(df_rasp.tail()) #7 columns, including the Date.
#print(df_rasp['Date'])
df_rasp['Date']=pd.to_datetime(df_rasp['Date']).dt.date
print(df_rasp)

#Variables for training
cols = list(df_rasp)[0:2]
#Date and temp,humidity columns are not used in training.
#print(cols) 

#New dataframe with only training data - mean temperature and humidity for all rows
df_for_rasp = df_rasp[cols]
#print(type(df_for_rasp))
#print(df_for_rasp)


#implementing MAPE-K loop
df_for_rasp_dict=df_for_rasp.set_index('Date').T.to_dict('list')
#print(df_for_rasp_dict.keys())
#print(forecast_dict)
open('knowled.csv', 'w').close()
import pandas as pd
for rec in forecast_dict.keys():
    temp=(forecast_dict.get(rec))
    if rec in df_for_rasp_dict.keys():
        temp2=(df_for_rasp_dict.get(rec))
        diff_temp=temp[0]-temp2[0]
        print("temperature difference observed for: ",diff_temp,rec)
        if (diff_temp)!=0 :
            #knowledge=pd.DataFrame(['5','4],columns=['date','temperature'])
            knowled = pd.DataFrame([[rec, temp2[0]]], columns=['Date', 'Temp'])
            #knowledge.to_csv('knowledge.csv',index=False)
            knowled.to_csv('knowled.csv',mode='a+', header=False, index=False, encoding="utf-16")
            if (diff_temp)<0:
                final_temp=temp[0]+abs(diff_temp) ##final_temp has to be passed to  GUI
                print("predicted temp less than actual rasp,final_temp is: ",final_temp)
            else:
                final_temp=temp2[0]+diff_temp
                print("predicted temp greater than actual rasp,final_temp is: ",final_temp)
    else:
        temp2=['None']
  #  print(rec,temp[0],type(temp[0]),temp2[0])
print(final_temp)
# calculating deviation for humidity
print(type(df_forecast_h))
df_forecast_h['Date'] = pd.to_datetime(df_forecast_h['Date']).dt.date


forecast_dict_h=df_forecast_h.set_index('Date').T.to_dict('list')
##print(forecast_dict)
##print(forecast_dict["2022-04-30"])
for rec in forecast_dict_h.keys():
    temp=(forecast_dict_h.get(rec))
 #   print(rec,temp[0],type(temp[0]))


df_rasp = pd.read_csv('rasp_input.csv')
#print(df_rasp.head()) #7 columns, including the Date.
#print(df_rasp.tail()) #7 columns, including the Date.
#print(df_rasp['Date'])
df_rasp['Date']=pd.to_datetime(df_rasp['Date']).dt.date
#print(df_rasp)

#Variables for training
#cols = list(df_rasp_h)[0:2]
cols = ['Date', 'Humidity']
#Date and volume columns are not used in training.
#print(cols) 

#New dataframe with only training data - mean temperature and humidity for all rows
df_for_rasp_h = df_rasp[cols]
#print(type(df_for_rasp_h))
print(df_for_rasp_h)


#MAPEK for humidity
df_for_rasp_dict_h=df_for_rasp_h.set_index('Date').T.to_dict('list')
#print(df_for_rasp_dict_h.keys())
#print(forecast_dict)
open('knowled_h.csv', 'w').close()
import pandas as pd
for rec in forecast_dict_h.keys():
    temp_h=(forecast_dict_h.get(rec))
    if rec in df_for_rasp_dict_h.keys():
        temp2_h=(df_for_rasp_dict_h.get(rec))
        diff_hum=temp_h[0]-temp2_h[0]
        print("Humidity difference observed for: ",diff_temp,rec)
        if (diff_hum)!=0 :
            #knowledge=pd.DataFrame(['5','4],columns=['date','temperature'])
            knowled_h = pd.DataFrame([[rec, temp2_h[0]]], columns=['Date', 'Temp'])
            #knowledge.to_csv('knowledge.csv',index=False)
            knowled_h.to_csv('knowled_h.csv',mode='a', header=False, index=False, encoding="utf-16")
            if (diff_hum)<0:
                final_hum=temp_h[0]+abs(diff_hum) ##final_temp has to be passed to  GUI
                print("predicted humidity less than actual sensor value,final Humidity is: ",final_hum)
            else:
                final_hum=temp2_h[0]+diff_hum
                print("predicted humidity greater than actual sensor value,final Humidity is: ",final_hum)
    else:
        temp2_h=['None']
   # print(rec,temp_h[0],type(temp_h[0]),temp2_h[0])
df_for_rasp
print(df_for_rasp_dict_h.keys())
print(df_for_rasp_dict.keys())
from datetime import date

today = date.today()
checking_hum
checking_hum= df_for_rasp_dict_h[today]
print(checking_hum)
checking_temp= df_for_rasp_dict[today]
print(checking_temp)
if ((checking_temp[0]<=17) and (checking_hum[0]<=60)):
    myhome=1 #on
elif ((checking_temp[0]>=17) and (checking_hum[0]<=60)):
    myhome=2 # heater off, water on
elif ((checking_temp[0]<=17) and (checking_hum[0]>=60)):
    myhome=3 # heater on, water off
else:
    myhome=4
print(myhome)
print(myhome)
%store myhome
checking1_temp1=30
checking1_hum1=50
if ((checking1_temp1<=17) and (checking1_hum1<=60)):
    myhome=1 #on
elif ((checking1_temp1>=17) and (checking1_hum1<=60)):
    myhome=2 # heater off, water on
elif ((checking1_temp1<=17) and (checking1_hum1>=60)):
    myhome=3 # heater on, water off
else:
    myhome=4
print(myhome)
%store myhome


