In [20]:
#import torch and other necessary modules from torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
#import pandas and other necessary modules from pandas
import pandas as pd

#import numpy and other necessary modules from numpy
import numpy as np

#import sklearn and other necessary models from sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


# take a look at the csv file yourself first
# columns High, Low, Open are input features and column Close is target value
df = pd.read_csv('./coin_Bitcoin.csv') #For Gradescope: './coin_Bitcoin.csv' #'/content/drive/MyDrive/ASU Spring 2023/CSE 475/Labs/Lab3/Lab3pt2/coin_Bitcoin.csv'
x = df[["High", "Low", "Open"]]
y = df[["Close"]]

# use StandardScaler from sklearn to standardize
scaler_x = StandardScaler()
scaler_y = StandardScaler()
x = scaler_x.fit_transform(x)
y = scaler_y.fit_transform(y)


# split into train and evaluation (8 : 2) using train_test_split from sklearn
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size=0.8, test_size=0.2)


# now make x and y tensors, think about the shape of train_x, it should be (total_examples, sequence_lenth, feature_size)
# we wlll make sequence_length just 1 for simplicity, and you could use unsqueeze at dimension 1 to do this
# also when you create tensor, it needs to be float type since pytorch training do not take default type read using pandas
train_x = torch.tensor(train_x.reshape(-1, 1, train_x.shape[1])).float() #param -1: indicates to numpy to figure out that dim length
train_y = torch.tensor(train_y.reshape(-1, 1)).float()
test_x = torch.tensor(test_x.reshape(-1, 1, test_x.shape[1])).float()
seq_len = train_x[0].shape[0] # it is actually just 1 as explained above


# different from CNN which uses ImageFolder method, we don't have such method for RNN, so we need to write dataset class ourselves, reference tutorial is in main documentation
class BitCoinDataSet(Dataset):
    def __init__(self, train_x, train_y):
        super(Dataset, self).__init__()
        self.train_x = train_x
        self.train_y = train_y

    def __len__(self):
        return len(self.train_x)

    def __getitem__(self, idx):
        return self.train_x[idx], self.train_y[idx]


# now prepare dataloader for training set and evaluation set, and hyperparameters
hidden_size = 32 #between size of input and output: 2/3 the size of input layer +size of output layer and less than twice the size of input layer
num_layers = 2
learning_rate = 0.001
batch_size = 32 #default is 32: must be power of 2: 2,4,8,16,32,64,128
epoch_size = 10 #default is 10

train_dataset = BitCoinDataSet(train_x, train_y)
test_dataset = BitCoinDataSet(test_x, test_y)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


# model design goes here
class RNN(nn.Module):

    # there is no "correct" RNN model architecture for this lab either, you can start with a naive model as follows:
    # lstm with 5 layers (or rnn, or gru) -> linear -> relu -> linear
    # lstm: nn.LSTM (https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html)

    def __init__(self, input_feature_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.lstm = nn.LSTM(input_size=input_feature_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.lin1 = nn.Linear(in_features=hidden_size, out_features=64)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(in_features=64, out_features=1)
    
    def forward(self, x):
        # Flatten, LSTM->Linear
        out, _ = self.lstm(x)
        out = self.lin1(out[:, -1, :])
        out = self.relu(out)
        out = self.lin2(out)
        return out


# instantiate your rnn model and move to device as in cnn section
device = 'cuda' if torch.cuda.is_available() else 'cpu' # whether your device has GPU
rnn = RNN(input_feature_size=x.shape[1], hidden_size=hidden_size, num_layers=num_layers).to(device)
# loss function is nn.MSELoss since it is regression task
criteria = nn.MSELoss()
# you can start with using Adam as optimizer as well 
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.0001)



# start training 
rnn.train()
for epoch in range(epoch_size): # start with 10 epochs

    loss = 0.0 # you can print out average loss per batch every certain batches

    for batch_idx, data in enumerate(train_loader):
        # get inputs and target values from dataloaders and move to device
        inputs, targets = data
        inputs= inputs.to(device)
        targets = targets.to(device)

        # zero the parameter gradients using zero_grad()
        optimizer.zero_grad()
        # forward -> compute loss -> backward propogation -> optimize (see tutorial mentioned in main documentation)
        outputs = rnn(inputs)
        loss = criteria(outputs, targets)
        loss.backward()
        optimizer.step()
        loss += loss.item() # add loss for current batch
        if batch_idx % 100 == 99:    # print average loss per batch every 100 batches
            print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {loss / 100:.3f}')
            loss = 0.0

print('Finished Training')



prediction = []
ground_truth = []
# evaluation
rnn.eval()
with torch.no_grad():
    for data in test_loader:
        inputs, targets = data
        inputs = inputs.to(device)

        ground_truth += targets.flatten().tolist()
        out = rnn(inputs).detach().cpu().flatten().tolist()
        prediction += out


# remember we standardized the y value before, so we must reverse the normalization before we compute r2score
prediction = np.array(prediction).reshape(-1,1)
prediction = scaler_y.inverse_transform(prediction) 
ground_truth = np.array(ground_truth).reshape(-1,1)
ground_truth = scaler_y.inverse_transform(ground_truth)

# use r2_score from sklearn
r2score = r2_score(ground_truth, prediction)
print(r2score)

Finished Training
0.9640683354582797
