In [None]:
from torch.utils.data import Dataset, random_split
from pandas import read_csv
import seaborn as sns
import matplotlib.pyplot as plt

# dataset definition preparation 
class CSVDataset(Dataset):
    # load the dataset
    def __init__(self, path):
        # load the csv file as a dataframe
        self.df = read_csv(path)

        self.df.head()
        # store the inputs and outputs
        self.X = self.df.values[:, :-1]
        self.y = self.df.values[:, -1]
        # ensure target has the right shape
        self.y = self.y.reshape((len(self.y), 1))

    # number of rows in the dataset
    def __len__(self):
        return len(self.X)

    # get a row at an index
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]

    # get indexes for train and test rows
    def get_splits(self, n_test=0.33):
        # determine sizes
        test_size = round(n_test * len(self.X))
        train_size = len(self.X) - test_size
        # calculate the split
        return random_split(self, [train_size, test_size])
    
    def print_info(self):
        print(self.df.info())
        print(self.df.describe())
        print(self.df.head())
        print(self.df.isnull().sum())
        
    def visualize(self):
        plt.figure(figsize=(10, 6))
        sns.countplot(y='open', data=self.df)
        plt.title('Spans from 2010 to the end 2016')
        plt.show()
        
    def check(self):
        # Step 3: Check for missing values
        print(self.df.isnull().sum())

In [None]:
# Step 1: Load the dataset
data_set = CSVDataset('data/prices.csv')

In [None]:
# Step 2: Summarize the dataset
data_set.print_info()

In [None]:
# Step 3: Visualize the dataset
data_set.visualize()

In [None]:
# Step 4: Check for missing values
data_set.check()

In [None]:
import torch.nn as nn
import torch.nn.functional as func


# Step 5: Define the DNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Define the layers of the network here
        self.fc1 = nn.Linear(10, 64)  # assuming input features are 10
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)  # output is 1 for a regression task

    def forward(self, x):
        # Define the forward pass
        x = func.relu(self.fc1(x))
        x = func.relu(self.fc2(x))
        x = self.fc3(x)  # no activation function for the la
        return x

In [None]:
from torch.utils.data import DataLoader
from torch.optim import SGD
from torch.nn import MSELoss
from tqdm import tqdm

# Step 6: Create an instance of the network
model = Net()

# Step 7: Train the model
def train_model(train_data, training_model):
    size = len(train_data.dataset)
    # define the optimization
    criterion = MSELoss()
    optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
    # enumerate epochs
    # enumerate epochs
    for epoch in tqdm(range(100), desc='Training Epochs'):
        print(f"Epoch {epoch + 1}\n-------------------------------")
        # enumerate mini batches
        for batch, (inputs, targets) in enumerate(train_data):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = training_model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

            #if batch % 100 == 0:
            loss, current = loss.item(), batch * len(inputs)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")



In [None]:
# Step 8: Prepare the data
def prepare_data(dataset):
    # calculate split
    train, test = dataset.get_splits()
    # prepare data loaders
    return DataLoader(train, batch_size=1024, shuffle=True), DataLoader(test, batch_size=1024, shuffle=False)

In [None]:
# Step 9: Train the model
train_dl, test_dl = prepare_data(data_set)
print(len(train_dl.dataset), len(test_dl.dataset))

train_model(train_dl, model)
