In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.initializers import glorot_uniform
from keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('/Users/sachita/Documents/Projects/Summer/Winter2018/firedata/fire_mod.csv', sep=',')

timesteps = 12 #timesteps needed later
df.describe()

In [None]:
df.head()

In [None]:
#Sort by lat lon to covert to supervised data
df = df.sort_values(['lat', 'lon'])
df.head(10)

In [None]:
#Dropping date for LSTM conversions
df = df.drop(['lat', 'lon', 'date', 'gfed'], axis=1)

#Naming vital variables
df_shape = df.shape #Shape of dataframe before conversion to LSTM format
final_cols = df.columns.values.tolist() #Titles of columns in dataframe finally
n_vars = len(final_cols) #Number of variables we have for prediction
df.head()

In [None]:
df.shape

In [None]:
for col in df.columns:
    print(col)

In [None]:
# convert series to supervised learning

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array or dataframe.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
        
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
            
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
        
    return agg

In [None]:
# NORMALISATION OF DATA 

#Separating x and y to normalise x
x_val = df.iloc[:, :-1].values
y_val = df.iloc[:, -1].values
y_val = np.reshape(y_val, (df_shape[0],1)) #reshaping done to avoid (m,) type shape for y
#print(x_val.shape, y_val.shape)

#Standard Scaler is used 
scaler = StandardScaler()
scaled_val = scaler.fit_transform(x_val)
#print(scaled_val.shape)

#concatenate X and Y again
final_data = np.concatenate((scaled_val, y_val), axis=1) #returns numpy array
print('Shape of dataset, post Normalisation: ', final_data.shape) 



In [None]:
# PREPARATION FOR FRAMING AS SUPERVISED LEARNING

#generate the column names according to # of features
names = list()
for i in range(timesteps, 0, -1):
    names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    
names += ['var20(t)'] #adding last column as the one to be predicted in current timestep
# names list now has 253 columns

final_df = pd.DataFrame(columns=names)
print('Initial shape of empty dataset, without filling/reframing: ', final_df.shape)

#devising list of columns to drop from series to supervised function

start_index = (timesteps*n_vars)
end_index = ((timesteps+1)*n_vars)-1
cols_to_drop = list(range(start_index, end_index)) #columns to drop in the series to supervised conversion (2nd last 21 columns)
print(start_index, end_index)

In [None]:
#CALLING FUNCTION TO REFRAME COLUMNS

for i in range(3120): #3120 is number of lat-lon pairs
    
    #timestepping has to be done within a lat-lon pair
    reframed = series_to_supervised(final_data[i:(168+i), :], timesteps, 1) #168 because that is the number of entries for each lat lon pair
    
#     print("********")
#     for k in reframed.columns[cols_to_drop]:
#         print(k)
#     print("********")
    
    #drop columns we don't want to predict - these are the current weather conditions    
    reframed.drop(reframed.columns[cols_to_drop], axis=1, inplace=True)
    
    #concatenate finally
    final_df = pd.concat([final_df, reframed], axis=0)   

    #Check on progress
    if(i%300==0):
        print((i), 'Lat-Lon Pairs Complete')
        
print('Final Shape of dataset, post reframing: ', final_df.shape)

In [None]:
final_df.head()

In [None]:
#just a final check on the alignment of columns

for i in final_df.columns:
    print(i)

In [None]:
#Saving to csv to preserve it 
final_df.to_csv(path_or_buf='/Users/sachita/Documents/Projects/Summer/Winter2018/firedata/lstm.csv', sep=',', index=False)

In [None]:
final_df.shape