<a href="https://colab.research.google.com/github/sam-umair/image_classify/blob/main/RNN%2CLSTM%26GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RNN, LSTM and GRU

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

Upload Dataset

In [3]:
df = pd.read_csv('ny_taxi_data.csv')

In [4]:
df.head()

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag
0,id2875421,2,14-03-2016 17:24,14-03-2016 17:32,1,-73.982155,40.767937,-73.96463,40.765602,N
1,id2377394,1,12-06-2016 00:43,12-06-2016 00:54,1,-73.980415,40.738564,-73.999481,40.731152,N
2,id3858529,2,19-01-2016 11:35,19-01-2016 12:10,1,-73.979027,40.763939,-74.005333,40.710087,N
3,id3504673,2,06-04-2016 19:32,06-04-2016 19:39,1,-74.01004,40.719971,-74.012268,40.706718,N
4,id2181028,2,26-03-2016 13:30,26-03-2016 13:38,1,-73.973053,40.793209,-73.972923,40.78252,N


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16100 entries, 0 to 16099
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   id                  16100 non-null  object 
 1   vendor_id           16100 non-null  int64  
 2   pickup_datetime     16100 non-null  object 
 3   dropoff_datetime    16100 non-null  object 
 4   passenger_count     16100 non-null  int64  
 5   pickup_longitude    16100 non-null  float64
 6   pickup_latitude     16100 non-null  float64
 7   dropoff_longitude   16100 non-null  float64
 8   dropoff_latitude    16100 non-null  float64
 9   store_and_fwd_flag  16100 non-null  object 
dtypes: float64(4), int64(2), object(4)
memory usage: 1.2+ MB


In [9]:
data = df["passenger_count"].values
data

array([1, 1, 1, ..., 1, 1, 1])

## Data Preprocessing

In [11]:
df.isnull().sum()

Unnamed: 0,0
id,0
vendor_id,0
pickup_datetime,0
dropoff_datetime,0
passenger_count,0
pickup_longitude,0
pickup_latitude,0
dropoff_longitude,0
dropoff_latitude,0
store_and_fwd_flag,0


In [10]:
scaler = MinMaxScaler()
data = scaler.fit_transform(data.reshape(-1,1))  #-1 indicates

In [21]:
sequence_length = 5      #adjust this as needed
sequences = []
targets = []

for i in range(len(data) - sequence_length): #16100 - 5 = 16095 times loop will execute
    sequences.append(data[i:i+sequence_length]) # For 0,1 & 2 iteration: 0:5, 6:11, 12:17
    targets.append(data[i+sequence_length]) # For 0,1 and 2 iteration: 6,12,18

sequences = np.array(sequences)
targets = np.array(targets)

print('Squences:', sequences)
print('Targets:', targets)



Squences: [[[0. ]
  [0. ]
  [0. ]
  [0. ]
  [0. ]]

 [[0. ]
  [0. ]
  [0. ]
  [0. ]
  [1. ]]

 [[0. ]
  [0. ]
  [0. ]
  [1. ]
  [0.6]]

 ...

 [[0.4]
  [0. ]
  [0. ]
  [0. ]
  [0. ]]

 [[0. ]
  [0. ]
  [0. ]
  [0. ]
  [0. ]]

 [[0. ]
  [0. ]
  [0. ]
  [0. ]
  [0. ]]]
--------
Targets: [[1. ]
 [0.6]
 [0. ]
 ...
 [0. ]
 [0. ]
 [0. ]]


In [14]:
print(sequences.shape)
print(targets.shape)

(16095, 5, 1)
(16095, 1)


In [24]:
X_train,X_test,y_train,y_test = train_test_split(sequences, targets, test_size = 0.2, random_state = 42)

## Bulid and Train RNN model

In [26]:
#Initialize a sequential model
model_rnn = keras.Sequential(name="RNN")

#add a SimpleRNN layer with 100 units and ReLU activation function
# The input shape is set to match the number of features in X_train(number of tmes steps) and 1)
model_rnn.add(keras.layers.SimpleRNN(100, activation = 'relu', input_shape = (X_train.shape[1], 1)))

#Add a Dense layer with 1 unitwhich is typical for Regression tasks(outputting a single value)
model_rnn.add(keras.layers.Dense(1))

#compile the model with the Adam optimizer and mean squares error loss function
#Adam optimizer adapts the learning rate during training, and MSE is uded for regression tasks
model_rnn.compile(optimizer = 'adam', loss = 'mean_squared_error')
