In [1]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torchvision import transforms, utils
from torch.utils.data import TensorDataset, DataLoader
import time

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
    train_test_split,
    cross_validate,
    RandomizedSearchCV
)
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsOneClassifier
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import loguniform, randint

%matplotlib inline

In [3]:
with open("data/processed/training_data.pickle", "rb") as f:
    training_data = pickle.load(f)

with open("data/processed/training_arm.pickle", "rb") as f:
    training_arm = pickle.load(f)

with open("data/processed/mean_trajectory.pickle", "rb") as f:
    mean_trajectory = pickle.load(f)

with open("trained_models/lr.pickle", "rb") as f:
    lr = pickle.load(f)

In [4]:
class NeuralDecoder(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.main = nn.Sequential(
            nn.Linear(input_size, 500),
            nn.ReLU(),
            nn.Linear(500, 1_000),
            nn.ReLU(),
            nn.Linear(1_000, 5_000),
            nn.ReLU(),
            nn.Linear(5_000, 10_000),
            nn.ReLU(),
            nn.Linear(10_000, 15_000),
            nn.ReLU(),
            nn.Linear(15_000, 10_000),
            nn.ReLU(),
            nn.Linear(10_000, 5_000),
            nn.ReLU(),
            nn.Linear(5_000, output_size)
        )

    def forward(self, x):
        out = self.main(x)
        return out


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
nn = NeuralDecoder(input_size=297, output_size=3_000)
nn.load_state_dict(torch.load("trained_models/trained_nn.pt",
                              map_location=device))

<All keys matched successfully>

In [5]:
data = np.concatenate((training_data, training_arm), axis=1)
data.shape

(40000, 3298)

In [6]:
output_model1 = lr.predict(data[:, :297])
output_model1.shape

(40000,)

In [7]:
output_model2 = np.array(
    [mean_trajectory[f"dir_{int(i)}"] for i in output_model1])
output_model2.shape

(40000, 3000)

In [8]:
output_model3 = nn(torch.Tensor(data[:, :297])).detach().numpy()
output_model3.shape

(40000, 3000)

In [17]:
output_model1 = np.reshape(output_model1, (40_000, 1))
print(output_model1.shape)
print(output_model2.shape)
print(output_model3.shape)

output_models = np.concatenate(
    (output_model1, output_model2, output_model3), axis=1
)

(40000, 1)
(40000, 3000)
(40000, 3000)


In [22]:
# out_file = "data/processed/output_models.pickle"
# with open(out_file, "wb") as f:
#     pickle.dump(output_models, f)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(
    output_models, data[:, 297:],
    test_size=0.3, random_state=2022
)

print(X_train.shape)
print(y_train.shape)

(28000, 6001)
(28000, 3001)


In [19]:
final_model = RandomForestRegressor(n_jobs=-1)

# param_grid = {
#     "randomforestregressor__n_estimators": randint(low=10, high=500),
#     "randomforestregressor__min_samples_split": randint(low=2, high=100)
# }

# random_search = RandomizedSearchCV(
#     final_model,
#     param_distributions=param_grid,
#     n_jobs=-1,
#     n_iter=50,
#     cv=10,
#     random_state=123,
#     return_train_score=True
# )

In [20]:
final_model.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
prediction = final_model.predict(X_test)
rmse = np.sqrt(np.mean((y_test - prediction)**2))
print(rmse)

In [None]:
good_examples = 0
bad_examples = 0

ax_good = plt.subplot(121)
ax_bad = plt.subplot(122)

for idx in range(X_test.shape[0]):
    while good_examples < 30 and bad_examples < 30:
        sample_X = X_test[idx, :]
        sample_y = y_test[idx, :]
        prediction = final_model.predict(sample_X)
        rmse = np.sqrt(np.mean((prediction - sample_y)**2))
        if rmse < 5:
            good_examples += 1
            ax_good.plot(sample_y[:1000], sample_y[1000:2000], color="r")
            ax_good.plot(prediction[:1000], prediction[1000:2000], color="b")
        if rmse > 30:
            bad_examples += 1
            ax_bad.plot(sample_y[:1000], sample_y[1000:2000], color="r")
            ax_bad.plot(prediction[:1000], prediction[1000:2000], color="b")

ax_good.title.set_text("Good predictions")
ax_bad.title.set_text("Bad predictions")
ax_good.set_xlim([-150, 150])
ax_good.set_ylim([-100, 100])
ax_bad.set_xlim([-150, 150])
ax_bad.set_ylim([-100, 100])
plt.show()

In [None]:
# random_search.fit(X_train, y_train)
# results = pd.DataFrame(random_search.cv_results_)

In [None]:
# opt_final_model = RandomForestRegressor(
#     n_estimators=random_search.best_params_[
#         "randomforestregressor__n_estimators"],
#     min_samples_split=random_search.best_params_[
#         "randomforestregressor__min_samples_split"],
#     n_jobs=-1
# )

# opt_final_model.fit(X_train, y_train)

In [None]:
# prediction = opt_final_model.predict(X_test)
# rmse = np.sqrt(np.mean((y_test - prediction)**2))
# print(rmse)

In [None]:
# good_examples = 0
# bad_examples = 0

# ax_good = plt.subplot(121)
# ax_bad = plt.subplot(122)

# for idx in range(X_test.shape[0]):
#     while good_examples < 30 and bad_examples < 30:
#         sample_X = X_test[idx, :]
#         sample_y = y_test[idx, :]
#         prediction = opt_final_model.predict(sample_X)
#         rmse = np.sqrt(np.mean((prediction - sample_y)**2))
#         if rmse < 5:
#             good_examples += 1
#             ax_good.plot(sample_y[:1000], sample_y[1000:2000], color="r")
#             ax_good.plot(prediction[:1000], prediction[1000:2000], color="b")
#         if rmse > 30:
#             bad_examples += 1
#             ax_bad.plot(sample_y[:1000], sample_y[1000:2000], color="r")
#             ax_bad.plot(prediction[:1000], prediction[1000:2000], color="b")

# ax_good.title.set_text("Good predictions")
# ax_bad.title.set_text("Bad predictions")
# ax_good.set_xlim([-150, 150])
# ax_good.set_ylim([-100, 100])
# ax_bad.set_xlim([-150, 150])
# ax_bad.set_ylim([-100, 100])
# plt.show()