In [1]:
# Import TensorFlow and Keras libraries for building neural networks
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import pandas as pd
import numpy as np

# Import train_test_split from sklearn to split datasets into training and testing sets
from sklearn.model_selection import train_test_split

# Import StandardScaler from sklearn to standardize features by removing the mean and scaling to unit variance
from sklearn.preprocessing import StandardScaler



In [2]:
# Read the CSV files
arq_combined = pd.read_csv('../model/data/modisTodos.csv')
# Display the first few rows of the DataFrame to inspect the data
arq_combined.head() 

Unnamed: 0,latitude,longitude,confidence
0,-33.7092,-53.3442,0.0
1,-33.6991,-53.3287,0.0
2,-33.3994,-52.9535,0.0
3,-33.3934,-52.9476,0.0
4,-33.3688,-52.917,0.0


In [3]:
# Create future columns (latitude_futuro, longitude_futuro, confidence_futuro)
# Shift the 'latitude' column up by one row to create the 'latitude_futuro' column
arq_combined['latitude_futuro'] = arq_combined['latitude'].shift(-1)
# Shift the 'longitude' column up by one row to create the 'longitude_futuro' column
arq_combined['longitude_futuro'] = arq_combined['longitude'].shift(-1)
# Shift the 'confidence' column up by one row to create the 'confidence_futuro' column
arq_combined['confidence_futuro'] = arq_combined['confidence'].shift(-1)

In [4]:
# Remove rows with NaN values from the DataFrame
arq_combined.dropna(inplace=True)

In [5]:
# Separate the input columns (features) and output columns (targets)
X = arq_combined[['latitude', 'longitude', 'confidence']].values
y = arq_combined[['latitude_futuro', 'longitude_futuro', 'confidence_futuro']].values

In [6]:
# Split the dataset into training and testing sets
# 80% of the data will be used for training and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Normalize the data using StandardScaler
scaler = StandardScaler()
# Fit the scaler on the training data and transform it
X_train = scaler.fit_transform(X_train)
# Transform the test data using the same scaler (without fitting again)
X_test = scaler.transform(X_test)

In [8]:
# Define the architecture of the neural network
model = tf.keras.Sequential([
    # First hidden layer with 64 neurons and ReLU activation function
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    # Second hidden layer with 128 neurons and ReLU activation function
    layers.Dense(128, activation='relu'),
    # Output layer with 3 neurons (for predicting future latitude, longitude, and confidence)
    layers.Dense(3)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# Compile the model
# Use the Adam optimizer, mean squared error (mse) as the loss function, and mean absolute error (mae) as a metric
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [10]:
# Train the model
# Fit the model on the training data for 50 epochs with a validation split of 20%
history = model.fit(X_train, y_train, epochs=50, validation_split=0.2)

Epoch 1/50
[1m20975/20975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460us/step - loss: 202.9103 - mae: 7.1489 - val_loss: 133.9125 - val_mae: 5.6437
Epoch 2/50
[1m20975/20975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 450us/step - loss: 133.6073 - mae: 5.6449 - val_loss: 133.7614 - val_mae: 5.5813
Epoch 3/50
[1m20975/20975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468us/step - loss: 133.6569 - mae: 5.6320 - val_loss: 133.5922 - val_mae: 5.6437
Epoch 4/50
[1m20975/20975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454us/step - loss: 132.8612 - mae: 5.6025 - val_loss: 134.1533 - val_mae: 5.6379
Epoch 5/50
[1m20975/20975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 450us/step - loss: 132.4735 - mae: 5.5899 - val_loss: 132.6962 - val_mae: 5.5471
Epoch 6/50
[1m20975/20975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 449us/step - loss: 132.4752 - mae: 5.5810 - val_loss: 133.8200 - val_mae: 5.5467
Epoch 7/50
[1m20975/2097

In [11]:
# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f'Mean Absolute Error on Test Data: {test_mae}')

[1m   1/6555[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:16[0m 12ms/step - loss: 107.7048 - mae: 4.7843

[1m6555/6555[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 277us/step - loss: 132.2999 - mae: 5.5343
Mean Absolute Error on Test Data: 5.5259785652160645


In [12]:
# Make predictions with new data
y_pred = model.predict(X_test)
# Select the first 100 predictions to display
final = y_pred[:100]

[1m6555/6555[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 234us/step


In [13]:
# Create a pandas DataFrame with the predicted data
final = pd.DataFrame(final, columns=['latitude_futuro', 'longitude_futuro', 'confidence_futuro'])

# Save the DataFrame to a CSV file without the index
final.to_csv('valores_futuros.csv', index=False)