In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Assuming all_arrays is a list of tuples (speed, direction, lat, lon, date)
# Flatten the data and add date information
flattened_data_list = []
for speed, direction, lat, lon, date in all_arrays:
    valid_mask = ~np.isnan(speed)
    speed_valid = speed[valid_mask]
    direction_valid = direction[valid_mask]
    lat_valid = lat[valid_mask]
    lon_valid = lon[valid_mask]
    date_valid = date[valid_mask]  # Ensure date matches

    # Convert direction to Cartesian coordinates
    dir_cos = np.cos(direction_valid * np.pi / 180)
    dir_sin = np.sin(direction_valid * np.pi / 180)

    # Combine into a single array (adding the date as a feature)
    flattened_data = np.column_stack((speed_valid, dir_cos, dir_sin, lat_valid, lon_valid, date_valid))
    flattened_data_list.append(flattened_data)

# Concatenate all flattened arrays into a single array
all_flattened_data = np.concatenate(flattened_data_list, axis=0)

# Create a DataFrame for sorting and managing dates
df = pd.DataFrame(all_flattened_data, columns=['speed', 'dir_cos', 'dir_sin', 'lat', 'lon', 'date'])
df['date'] = pd.to_datetime(df['date'])  # Ensure the date is in datetime format

# Sort the DataFrame by date
df.sort_values('date', inplace=True)

# Normalize features except date
scaler = StandardScaler()
df[['speed', 'dir_cos', 'dir_sin', 'lat', 'lon']] = scaler.fit_transform(df[['speed', 'dir_cos', 'dir_sin', 'lat', 'lon']])

# Create sequences based on time order
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length + 1):
        seq = data[i:i + seq_length]  # Get the current sequence
        sequences.append(seq)
    return np.array(sequences)

# Create sequences from normalized data (ignoring date for model input)
sequences = create_sequences(df[['speed', 'dir_cos', 'dir_sin', 'lat', 'lon']].to_numpy(), sequence_length=3)

# Split the sequences into training and validation sets
train_data, val_data = train_test_split(sequences, test_size=0.2, random_state=42)

# Convert the split data into PyTorch tensors
train_tensor = torch.FloatTensor(train_data)
val_tensor = torch.FloatTensor(val_data)

# Define the LSTM Autoencoder class as before (RecurrentAutoencoder)
class RecurrentAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(RecurrentAutoencoder, self).__init__()
        self.encoder_lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.decoder_lstm = nn.LSTM(hidden_dim, input_dim, batch_first=True)

    def forward(self, x):
        encoded_seq, (h, c) = self.encoder_lstm(x)
        decoded_seq, _ = self.decoder_lstm(encoded_seq)
        return decoded_seq

# Initialize model, optimizer, and loss function
model = RecurrentAutoencoder(input_dim=5, hidden_dim=4)  # Adjust hidden_dim as needed
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

num_epochs = 600
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    output = model(train_tensor)
    train_loss = criterion(output, train_tensor)
    train_loss.backward()
    optimizer.step()
    
    train_losses.append(train_loss.item())
    
    model.eval()
    with torch.no_grad():
        val_output = model(val_tensor)
        val_loss = criterion(val_output, val_tensor)

    val_losses.append(val_loss.item())

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

# Plot the training and validation losses
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Assuming all_arrays contains tuples of (speed, direction, lat, lon, date)
flattened_data_list = []
for speed, direction, lat, lon, date in all_arrays:
    valid_mask = ~np.isnan(speed)
    speed_valid = speed[valid_mask]
    direction_valid = direction[valid_mask]
    lat_valid = lat[valid_mask]
    lon_valid = lon[valid_mask]
    date_valid = date[valid_mask]

    # Convert direction to Cartesian coordinates
    dir_cos = np.cos(direction_valid * np.pi / 180)
    dir_sin = np.sin(direction_valid * np.pi / 180)

    # Flattening and combining data
    flattened_data = np.column_stack((speed_valid, dir_cos, dir_sin, lat_valid, lon_valid, date_valid))
    flattened_data_list.append(flattened_data)

# Create a DataFrame from flattened data
df = pd.DataFrame(np.vstack(flattened_data_list), columns=['speed', 'dir_cos', 'dir_sin', 'lat', 'lon', 'date'])

# Convert the date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Sort the DataFrame by date
df.sort_values('date', inplace=True)

# Drop the date column (not needed for training)
df.drop(columns=['date'], inplace=True)

# Normalize features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.values)

# Decide on a sequence length (e.g., 10 time steps)
sequence_length = 10

# Prepare the data for LSTM
X = []
for i in range(len(scaled_data) - sequence_length):
    X.append(scaled_data[i:i + sequence_length])

X = np.array(X)

# Split into train and validation sets
train_data, val_data = train_test_split(X, test_size=0.2, random_state=42)

# Now you can feed train_data and val_data into your LSTM Autoencoder
train_tensor = torch.FloatTensor(train_data)
val_tensor = torch.FloatTensor(val_data)

# Continue with your LSTM Autoencoder training...


In [None]:
# Create sequences based on existing order
sequence_length = 10
X = []
for i in range(len(scaled_data) - sequence_length):
    X.append(scaled_data[i:i + sequence_length])

X = np.array(X)
