## Pytorch Linear Regression

Credit: https://towardsdatascience.com/linear-regression-with-pytorch-eb6dedead817

Input data will be in the following format for each restaurant business:
- food positive sentiment
- food negative sentiment
- service positive sentiment
- service negative sentiment
- location positive sentiment
- location negative sentiment
- cleanliness positive sentiment
- cleanliness negative sentiment
- price positive sentiment
- price negative sentiment

We want to predict the rating between 1 and 5 stars.

In [8]:
import numpy as np
import pandas as pd
import torch
import json
from torch.utils.data import random_split, Dataset, DataLoader
import matplotlib.pyplot as plt
from torch.autograd import Variable

In [4]:
# load the json data
with open('restaurant-topic-sentiment.json') as file:
    json_data = json.load(file)

# flatten the data
rows = []
for key, value in json_data.items():
    flattened_entry = {'id': key}
    for sentiment, categories in value.items():
        for category, score in categories.items():
            flattened_entry[f'{sentiment}_{category}'] = score
    rows.append(flattened_entry)

df = pd.DataFrame(rows)

# drop the 'id' column and convert to numpy array
data = df.drop(columns=['id']).to_numpy()

# calculate the size of train and test splits
train_size = int(0.8 * len(data))
test_size = len(data) - train_size

# split the dataset
train_data, test_data = random_split(data, [train_size, test_size], generator=torch.Generator().manual_seed(42))

# A custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Assuming the last column (index -1) is the label 'rating'
        x = self.data[idx, :-1]
        y = self.data[idx, -1]
        return x, y

# create datasets and dataloaders
train_dataset = CustomDataset(np.array(train_data))
test_dataset = CustomDataset(np.array(test_data))

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# extracting train and test sets (example for one batch)
x_train, y_train = next(iter(train_loader))
x_test, y_test = next(iter(test_loader))

In [3]:
! pip install torch

Collecting torch
  Downloading torch-2.1.1-cp311-none-macosx_11_0_arm64.whl.metadata (25 kB)
Downloading torch-2.1.1-cp311-none-macosx_11_0_arm64.whl (59.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.6/59.6 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: torch
Successfully installed torch-2.1.1


In [5]:
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        out = self.linear(x)
        return out

In [6]:
inputDim = 10
outputDim = 1
learningRate = 0.2
epochs = 400

model = linearRegression(inputDim, outputDim)
##### For GPU #######
if torch.cuda.is_available():
    model.cuda()

In [7]:
criterion = torch.nn.MSELoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
for epoch in range(epochs):
    # Converting inputs and labels to Variable
    if torch.cuda.is_available():
        inputs = Variable(torch.from_numpy(x_train).cuda())
        labels = Variable(torch.from_numpy(y_train).cuda())
    else:
        inputs = Variable(torch.from_numpy(x_train))
        labels = Variable(torch.from_numpy(y_train))

    # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
    optimizer.zero_grad()

    # get output from the model, given the inputs
    outputs = model(inputs)

    # get loss for the predicted output
    loss = criterion(outputs, labels)
    print(loss)
    # get gradients w.r.t to parameters
    loss.backward()

    # update parameters
    optimizer.step()

    print('epoch {}, loss {}'.format(epoch, loss.item()))

In [1]:
# Predict target variable using X test
with torch.no_grad():
    if torch.cuda.is_available():
        predicted = model(Variable(torch.from_numpy(x_test).cuda())).cpu().data.numpy()
    else:
        predicted = model(Variable(torch.from_numpy(x_test))).data.numpy()
    print(predicted)

plt.clf()
fig, axes = plt.subplots(5,2, figsize=(12,36))
axes = axes.flatten()

# plot model predictions with respect to each attribute
i = 0
for j in range(0,10):
    axes[i].plot(x_test[j], y_test, 'go', label='True data', alpha=0.5)
    axes[i].plot(x_test[j], predicted, '--', label='Predictions', alpha=0.5)
    i+=1
plt.legend(loc='best')
plt.show()

NameError: name 'torch' is not defined

## Classification Model