In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.autograd import Variable
import numpy as np


In [None]:
df = pd.read_csv("../../feature1_clean.csv")
df.columns

In [None]:
df["long_up"] = df["LONGITUDE"].shift(periods=-1)
df["lat_up"] = df["LATITUDE"].shift(periods=-1)
df["heading_up"] = df["HEADING"].shift(periods=-1)
df["SOG_up"] = df["SOG"].shift(periods=-1)
df["STW_up"] = df["STW"].shift(periods=-1)
df.drop(index=df.index[0], axis=0, inplace=True)

df["DISP"] = df.apply(lambda x: ((x.LONGITUDE -  x.long_up)**2 + (x.LATITUDE-x.lat_up)**2)**(0.5), axis=1)
df["OUT"]=df.apply(lambda x: (x["ENGINE_1_FUEL_CONSUMPTION"] +  x["ENGINE_2_FUEL_CONSUMPTION"]/2/(x["DISP"]*60+1e-6)/10**6), axis=1)


In [None]:
X = df.drop(columns = ['Dati', 'Time','ENGINE_1_FLOWRATE', 'ENGINE_1_FLOWRATEA',
       'ENGINE_1_FLOWRATEB', 'ENGINE_1_FUEL_CONSUMPTION', 'ENGINE_2_FLOWRATE', 'ENGINE_2_FLOWRATEA',
       'ENGINE_2_FLOWRATEB', 'ENGINE_2_FUEL_CONSUMPTION', 'WIND_ANGLE', 'WIND_SPEED',
       'WIND_ANGLE_TRUE', 'WIND_SPEED_TRUE', "datetime", 'SOG_SPEEDLOG_LONG', 'SOG_SPEEDLOG_TRANS',
       'LONGITUDE', "LATITUDE", "HEADING", "SOG", "STW", "date"])
y = df[["OUT", 'LATITUDE', 'LONGITUDE', 'SOG', 'STW', "HEADING"]]

In [None]:
y.columns

In [None]:
X.columns

In [None]:
# get one_hot for column
def one_hot(df, cols, drop_original=False):
    for col in cols:
        dummy = pd.get_dummies(df[col],prefix=col, drop_first=drop_original)
        df = pd.concat([df, dummy], axis=1)
    return df

X = one_hot(X, ["season", "direction", "weathercode"], drop_original=True)

In [None]:
# standard scaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
class Lstm(nn.Module):
    def __init__(self, input_size, hidden_size=1, output_size=1, num_layers=1):
        super().__init__()
 
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)  # utilize the LSTM model in torch.nn
        self.fc = nn.Linear(hidden_size, output_size) # fc
 
    def forward(self, _x):
        x, _ = self.lstm(_x)  # _x is input, size (seq_len, batch, input_size)
        s, b, h = x.shape  # x is output, size (seq_len, batch, hidden_size)
        x = self.fc(x)
        return x[-1, :, :]

In [None]:
X.shape, y.shape

In [None]:
data_len = len(X)
t = np.linspace(0, data_len, data_len + 1)

train_data_ratio = 0.8  # Choose 80% of the data for training
train_data_len = round(int(data_len * train_data_ratio), -3)

train_x = X[:train_data_len]
train_y = y[:train_data_len]
t_for_training = t[5:train_data_len]

test_x = X[train_data_len:]
test_y = y[train_data_len:]
t_for_testing = t[train_data_len:]

# ----------------- train -------------------
INPUT_FEATURES_NUM = 42
OUTPUT_FEATURES_NUM = 6
train_x_tensor = train_x.reshape(10, -1, INPUT_FEATURES_NUM) 
train_y_tensor = train_y.to_numpy().reshape(10, -1, OUTPUT_FEATURES_NUM) 
# transfer data to pytorch tensor
train_x_tensor = torch.from_numpy(train_x_tensor).float()
train_y_tensor = torch.from_numpy(train_y_tensor).float()


In [None]:
device = torch.device("cpu")
lstm_model = Lstm(INPUT_FEATURES_NUM, 20, output_size=OUTPUT_FEATURES_NUM, num_layers=1)  # 20 hidden units
print('LSTM model:', lstm_model)
print('model.parameters:', lstm_model.parameters)
print('train x tensor dimension:', Variable(train_x_tensor).size())

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-2)

prev_loss = 1000
max_epochs = 2000

train_x_tensor = train_x_tensor.to(device)

for epoch in range(max_epochs):
    output = lstm_model(train_x_tensor).to(device)
    loss = criterion(output, train_y_tensor)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if loss < prev_loss:
        torch.save(lstm_model.state_dict(), 'lstm_model.pt')  # save model parameters to files
        prev_loss = loss

    if loss.item() < 1e-4:
        print('Epoch [{}/{}], Loss: {:.5f}'.format(epoch + 1, max_epochs, loss.item()))
        print("The loss value is reached")
        break
    elif (epoch + 1) % 100 == 0:
        print('Epoch: [{}/{}], Loss:{:.5f}'.format(epoch + 1, max_epochs, loss.item()))

# prediction on training dataset
pred_y_for_train = lstm_model(train_x_tensor).to(device)
pred_y_for_train = pred_y_for_train.view(-1, OUTPUT_FEATURES_NUM).data.numpy()

# ----------------- test -------------------
lstm_model = lstm_model.eval()  # switch to testing model

# prediction on test dataset
test_x_tensor = test_x.reshape(10, -1, INPUT_FEATURES_NUM)
test_x_tensor = torch.from_numpy(test_x_tensor)  # 变为tensor
test_x_tensor = test_x_tensor.to(device)

pred_y_for_test = lstm_model(test_x_tensor).to(device)
pred_y_for_test = pred_y_for_test.view(-1, OUTPUT_FEATURES_NUM).data.numpy()

loss = criterion(torch.from_numpy(pred_y_for_test), torch.from_numpy(test_y))
print("test loss：", loss.item())


In [None]:
# ----------------- plot -------------------
plt.figure()
plt.plot(t_for_training, train_y, 'b', label='y_trn')
plt.plot(t_for_training, pred_y_for_train, 'y--', label='pre_trn')

plt.plot(t_for_testing, test_y, 'k', label='y_tst')
plt.plot(t_for_testing, pred_y_for_test, 'm--', label='pre_tst')

plt.xlabel('t')
plt.ylabel('Vce')
plt.show()