In [7]:
import argparse
from math import sqrt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [8]:
# Mount Google Drive if necessary
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
df = pd.read_csv('/content/drive/MyDrive/ColabNotebooks/AAI530/Final Project/traffic-volume-counts-1.csv')

In [10]:
df['SegmentID'].value_counts()

76510     70
107111    70
77328     70
77424     70
91529     70
          ..
1885       2
41760      2
41759      2
1886       2
202        1
Name: SegmentID, Length: 1956, dtype: int64

In [11]:
# 76510 has the most rows
# We can use this as the dataset for the daily traffic count

# Get all the rows that have 76510
df_new = df.loc[df['SegmentID']==76510]

print('Unique directions of SegmentID 76510')
print(df_new['Direction'].value_counts())

col_names = df_new.columns

Unique directions of SegmentID 76510
EB    35
WB    35
Name: Direction, dtype: int64


In [12]:
# Separate data into EB and WB
data_EB = []
data_WB = []
# Find average traffic volume per day
for i in range(len(df_new)):
  traffic_data = []
  for j in range(7, df_new.shape[1]):
    traffic_data.append(df_new[col_names[j]].iloc[i])
  traffic_data_mean = np.mean(traffic_data)
  if df_new['Direction'].iloc[i] == 'EB':
    data_EB.append(traffic_data_mean)
  else:
    data_WB.append(traffic_data_mean)

In [13]:
print(len(data_EB))
print(len(data_WB))

35
35


In [14]:
df_new.head()

Unnamed: 0,ID,SegmentID,Roadway Name,From,To,Direction,Date,12:00-1:00 AM,1:00-2:00AM,2:00-3:00AM,...,2:00-3:00PM,3:00-4:00PM,4:00-5:00PM,5:00-6:00PM,6:00-7:00PM,7:00-8:00PM,8:00-9:00PM,9:00-10:00PM,10:00-11:00PM,11:00-12:00AM
1931,1929,76510,31 AVENUE,87 STREET,88 STREET,EB,01/25/2012,56.0,32.0,26.0,...,200.0,275.0,241.0,304.0,236.0,185.0,129.0,105.0,94.0,66.0
1932,1930,76510,31 AVENUE,87 STREET,88 STREET,EB,01/26/2012,55.0,29.0,21.0,...,211.0,247.0,235.0,281.0,262.0,190.0,127.0,124.0,83.0,55.0
1933,1931,76510,31 AVENUE,87 STREET,88 STREET,EB,01/27/2012,45.0,31.0,20.0,...,206.0,277.0,258.0,294.0,290.0,193.0,169.0,136.0,113.0,85.0
1934,1932,76510,31 AVENUE,87 STREET,88 STREET,EB,01/28/2012,101.0,53.0,52.0,...,212.0,214.0,235.0,251.0,222.0,175.0,165.0,151.0,136.0,125.0
1935,1933,76510,31 AVENUE,87 STREET,88 STREET,EB,01/29/2012,118.0,96.0,84.0,...,173.0,201.0,184.0,163.0,195.0,177.0,144.0,117.0,95.0,67.0


In [15]:
# Create list of sequences for training
seq_length = 10
pred_horiz = 2

seq_arrays = []
seq_labs = []

# only for EB so far
for i in range(0,len(data_EB)-seq_length-pred_horiz):
  seq_arrays.append(data_EB[i:i+seq_length])
  seq_labs.append(data_EB[i+seq_length+pred_horiz-1])  # subtract 1 for zero-indexing

# convert to numpy arrays and floats for keras/tensorflow
seq_arrays = np.array(seq_arrays, dtype=object).astype(np.float32)
seq_labs = np.array(seq_labs, dtype=object).astype(np.float32)

In [24]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from contextlib import redirect_stdout

# model = Sequential([
#         layers.LSTM(50, activation='relu', input_shape=(seq_length, 1)),
#         layers.Dense(100, activation='relu'),
#         layers.Dense(1)
#     ])
# model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')
# print(model.summary())
# with open(results_dir+'/lstm_model_summary.txt', 'w') as f:
#   with redirect_stdout(f):
#     model.summary()

In [25]:
# history = model.fit(seq_arrays, seq_labs, epochs=100, batch_size=500, validation_split=0.05, verbose=2)

# print(history.history.keys())

In [26]:
def create_conv_lstm_model(n_steps, n_features, filter_num=64, kernel_size=3, pool_size=2, lstm_units=50, dense_units=100, dropout_rate=0.2, learning_rate=0.001, results_dir="./results"):
  """
  Creates and compiles a Convolutional LSTM model for sequence data processing, integrating
  Conv1D and LSTM layers with Dropout for regularization.

  Parameters:
  - n_steps: Integer, number of time steps in each input sample.
  - n_features: Integer, number of features in each input sample.
  - filter_num: Integer, number of filters in the Conv1D layer (default 64).
  - kernel_size: Integer, size of the convolution kernel (default 3).
  - pool_size: Integer, size of the pooling window (default 2).
  - lstm_units: Integer, number of units in the LSTM layer (default 50).
  - dense_units: Integer, number of units in the Dense layer (default 100).
  - dropout_rate: Float between 0 and 1, fraction of the input units to drop (default 0.2).
  - learning_rate: Float, learning rate for the Adam optimizer (default 0.001).

  Returns:
  - Compiled Keras model ready for training.
  """
  model = Sequential([
      layers.Input(shape=(n_steps, n_features)),
      layers.Conv1D(filters=filter_num, kernel_size=kernel_size, activation='relu'),
      layers.MaxPooling1D(pool_size=pool_size),
      layers.Flatten(),
      layers.Reshape((1, -1)),  # Reshape for LSTM layer
      layers.LSTM(units=lstm_units, activation='relu'),
      layers.Dropout(dropout_rate),
      layers.Dense(units=dense_units, activation='relu'),
      layers.Dropout(dropout_rate),
      layers.Dense(1)  # Output layer
  ])

  model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error', metrics=['mean_absolute_error'])

  #with open(results_dir+'/conv_lstm_model_summary.txt', 'w') as f:
      #with redirect_stdout(f):
  print(model.summary())

  return model

model_conv = create_conv_lstm_model(n_steps=seq_length, n_features=1)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_2 (Conv1D)           (None, 8, 64)             256       
                                                                 
 max_pooling1d_2 (MaxPoolin  (None, 4, 64)             0         
 g1D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 256)               0         
                                                                 
 reshape_2 (Reshape)         (None, 1, 256)            0         
                                                                 
 lstm_3 (LSTM)               (None, 50)                61400     
                                                                 
 dropout_4 (Dropout)         (None, 50)                0         
                                                      

In [27]:
history = model_conv.fit(seq_arrays, seq_labs, epochs=100, batch_size=500, validation_split=0.05, verbose=2)

print(history.history.keys())

Epoch 1/100
1/1 - 3s - loss: nan - mean_absolute_error: nan - val_loss: nan - val_mean_absolute_error: nan - 3s/epoch - 3s/step
Epoch 2/100
1/1 - 0s - loss: nan - mean_absolute_error: nan - val_loss: nan - val_mean_absolute_error: nan - 58ms/epoch - 58ms/step
Epoch 3/100
1/1 - 0s - loss: nan - mean_absolute_error: nan - val_loss: nan - val_mean_absolute_error: nan - 66ms/epoch - 66ms/step
Epoch 4/100
1/1 - 0s - loss: nan - mean_absolute_error: nan - val_loss: nan - val_mean_absolute_error: nan - 61ms/epoch - 61ms/step
Epoch 5/100
1/1 - 0s - loss: nan - mean_absolute_error: nan - val_loss: nan - val_mean_absolute_error: nan - 60ms/epoch - 60ms/step
Epoch 6/100
1/1 - 0s - loss: nan - mean_absolute_error: nan - val_loss: nan - val_mean_absolute_error: nan - 69ms/epoch - 69ms/step
Epoch 7/100
1/1 - 0s - loss: nan - mean_absolute_error: nan - val_loss: nan - val_mean_absolute_error: nan - 49ms/epoch - 49ms/step
Epoch 8/100
1/1 - 0s - loss: nan - mean_absolute_error: nan - val_loss: nan - va