In [None]:
import numpy as np 
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
from datetime import datetime
import os

In [None]:
# # Load the TensorBoard notebook extension
# %load_ext tensorboard

In [None]:
# drive mount
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%pwd

In [None]:
# enter your path for the  otherwise it wont work.

folder_path = '/content/drive/MyDrive/SIH_2022/'    #path to your model folder


In [None]:
# accessing file 2019Combined.csv
df = pd.read_csv(folder_path+'2019Combined.csv')
df.head()

In [None]:
# changing the working directory from colab to model folder

%cd $folder_path

In [None]:
%pwd

In [None]:
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
df.info()

In [None]:
df.plot(x='Date', figsize=(16,4))

In [None]:
df.plot(y=['R (Sunspot No.)  ','Kp index  '], x='Date', xlim = ['1 Jan 2019 00:00:00', '7 Jan 2019 08:00:00'], figsize=(16,4))

In [None]:
df.plot(y='R (Sunspot No.)  ',x='Date', xlim = ['1 Jan 2019 00:00:00', '7 Jan 2019 08:00:00'], figsize=(16,4))

In [None]:
#sunspot no, kp index vs tec graph

df.plot(x='Date', y='TEC', kind='scatter', figsize=(16,4))

In [None]:
# converting col Date into index and dropping from df

df.set_index('Date', drop=True, inplace=True)

In [None]:
df.head()

## **datetime resampling**

In [None]:
##monthly

df.resample(rule='M').max()['TEC'].plot(kind='bar', figsize=(16,4))

In [None]:
## daily

df.resample(rule='D').mean()['TEC'].plot(figsize=(16,4))

In [None]:
## daily data from 1 jan to 31 march

df.resample(rule='D').max()['TEC'].plot(xlim=['1 Jan 2019', '31 March 2019'], figsize=(16,4))

In [None]:
df.drop(columns=['YEAR','DAY','HOUR'], inplace=True)

In [None]:
### dropping ae index for 2019 bcz it is having nan value (9999)
df.drop(columns=['AE-index, nT'], inplace=True)

In [None]:
### removing lat lon also bcz we fixed it: (12.5, 75)
df.drop(columns=['lat', 'lon'], inplace=True)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
### seperating train and test data

train_size = int(len(df)*.9)
train_size

In [None]:
train_df, test_df = df[:train_size], df[train_size + 1 : ]
train_df.shape, test_df.shape

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
## scaling the train and test data

# # standard scaler object
# # mean = 0, variance = 1
# scaler = StandardScaler()
# train_scaler = scaler.fit_transform(train_df)


#minmax scaler
scaler = MinMaxScaler()
train_scaler = scaler.fit_transform(train_df)

In [None]:
train_scaler[0]

In [None]:
## applying transformations to the train data
train_df = pd.DataFrame(
    train_scaler,
    index = train_df.index,
    columns = train_df.columns
)

In [None]:
train_df.head() 

In [None]:
## applying test scaler

# # standard scaler
# test_scaler = StandardScaler()
# test_scaler = test_scaler.fit(test_df)

# minmax scaler
test_scaler = MinMaxScaler()
test_scaler = test_scaler.fit(test_df)

In [None]:
## applying transformations to the test data
test_df = pd.DataFrame(
    test_scaler.transform(test_df),
    index = test_df.index,
    columns = test_df.columns
)

In [None]:
test_df.head()

In [None]:
test_df.shape

In [None]:
## creating sequences which will help in training and testing
def createSequence(inputData: pd.DataFrame, targetColumn, sequenceLength):
  sequences = []
  dataSize = len(inputData)

  for i in range(dataSize - sequenceLength):
    sequence = inputData[i:i+sequenceLength]

    labelPos = i+sequenceLength
    label = inputData.iloc[labelPos][targetColumn]

    sequences.append((sequence, label))

  return sequences


In [None]:
## creating train and test sequences

seqLen = 12   # 1 day (12 even hrs) seq

trainSeq = createSequence(train_df, 'TEC', seqLen)
testSeq = createSequence(test_df, 'TEC', seqLen)

In [None]:
len(trainSeq)

In [None]:
len(testSeq)

In [None]:
trainSeq[0][0]

In [None]:
trainSeq[0][1]

In [None]:
type(trainSeq[0][0])

In [None]:
train_seq_np = np.array(trainSeq)
train_seq_np.shape

In [None]:
train_seq_np[1][0]

In [None]:
train_seq_np[1][1]

In [None]:
train_X = []

for i in train_seq_np:
  train_X.append(i[0])
train_X = np.array(train_X)

In [None]:
train_X.shape

In [None]:
train_X[0]

In [None]:
train_Y = []

for i in train_seq_np:
  train_Y.append(i[1])
train_Y = np.array(train_Y)

In [None]:
train_Y.shape

In [None]:
train_Y = train_Y.reshape(train_Y.shape[0], 1)
train_Y.shape

In [None]:
train_Y[0:5]

# **creating model**

In [None]:
#### creating a model and training 

model = Sequential()
model.add(keras.Input(shape = (train_X.shape[1], train_X.shape[2])))

# model.add(LSTM(5000,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

# model.add(LSTM(4700,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

# model.add(LSTM(4500,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

# model.add(LSTM(4200,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

# model.add(LSTM(4000,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

# model.add(LSTM(3800,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

# model.add(LSTM(5500,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

model.add(LSTM(5000,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.5))

model.add(LSTM(4000,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.5))

model.add(LSTM(3000,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.5))

# model.add(LSTM(2800,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

model.add(LSTM(2500,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.5))

# model.add(LSTM(2200,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

model.add(LSTM(2000,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.5))

# model.add(LSTM(1800,activation = 'tanh',return_sequences = True))
# model.add(Dropout(0.5))

model.add(LSTM(1500,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.5))

model.add(LSTM(1200,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(1000,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(800,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(650,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(512,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(500,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(480,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(450,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(420,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(400,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(380,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(350,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(320,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(300,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(280,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(260,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(240,activation = 'tanh',return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(220,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(200,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(180,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(160,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(140,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(128,activation = 'tanh',return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(100,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(80,activation = 'tanh',return_sequences = True))
model.add(Dropout(0.2))

model.add(LSTM(64,activation = 'tanh',return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(32,activation = 'tanh',return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(20,activation = 'tanh',return_sequences = False))
model.add(Dropout(0.2))

model.add(Dense(20, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='relu'))

model.add(Dense(train_Y.shape[1]))

model.compile(optimizer = keras.optimizers.Adam(learning_rate = 0.001),loss = 'mse',metrics=['accuracy'])

model.summary()

# **training the model**

In [None]:
%pwd

In [None]:
##creating a model checkpoint

## make sure to check the model weights version before saving it

checkpoint_filepath = os.path.join(folder_path, 'ModelCheckpoints/', 'best_model_weights_2019_8_val_loss.h5')

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    # mode='max',
    save_best_only=True)

In [None]:
## early stopping

earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=6)

In [None]:
# log_dir = "/content/logs/fits" + datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# fitting the model
history = model.fit(train_X, train_Y,
                    epochs=25,
                    batch_size=8,
                    validation_split=0.2,
                    callbacks=[earlyStopping, model_checkpoint_callback],
                    verbose=1 )

# model.save_weights('epochWeights.h5')


In [None]:
## plotting the model

plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.plot(history.history['accuracy'], label='Training accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')

plt.legend()

In [None]:
## make sure to change the model version before saving it

savedModelPath = os.path.join(folder_path, 'ModelCheckpoints/', 'tecModel2019_8.h5')

model.save(savedModelPath)

In [None]:
train_X[0]

In [None]:
pred = model.predict(train_X[:1])

In [None]:
pred

# **Testing model**

In [None]:
testSeq[0][0]

In [None]:
testSeq[0][1]

In [None]:
type(testSeq)

In [None]:
#converting test seq into numpy array

test_seq_np = np.array(testSeq)
test_seq_np.shape

In [None]:
test_seq_np[0][0]
# print(test_seq_np[0][1])

In [None]:
test_X = []
test_Y = []

for i in test_seq_np:
  test_X.append(i[0])
  test_Y.append(i[1])
test_X = np.array(test_X)
test_Y = np.array(test_Y)

In [None]:
print(test_X.shape, test_Y.shape)

In [None]:
test_Y = test_Y.reshape(test_Y.shape[0],1)
print(test_X.shape, test_Y.shape)

In [None]:
test_Y[:5]

In [None]:
#### getting predictions for test set


test_pred = model.predict(test_X)

In [None]:
test_pred.shape

In [None]:
test_pred[:5]

In [None]:
for i in range(len(test_Y)):
  count=0
  if test_Y[i] == test_pred[i]:
    count += 1
print(count)

In [None]:
train_pred = model.predict(train_X)

for i in range(len(train_Y)):
  count=0
  if train_Y[i] == train_pred[i]:
    count += 1
print(count)

In [None]:
# prediction_copies = np.repeat(test_pred, test_df.shape[1], axis=-1)
# y_pred_future = test_scaler.inverse_transform(prediction_copies)[:,0]

In [None]:
y_pred_future

In [None]:
test_pred