In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import pickle
import datetime
import os

ModuleNotFoundError: No module named 'torch'

In [None]:
df = pd.read_csv('./data/Crop_recommendation.csv')
df

In [None]:
features = df.iloc[:, :-1].values
labels = df.iloc[:, -1].values

In [None]:
encoder = LabelEncoder()
labels = encoder.fit_transform(labels)
num_classes = len(np.unique(labels))

# Convert the features and labels to PyTorch tensors
features = torch.tensor(features, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.long)

# Normalize the features to have zero mean and unit variance
mean = features.mean(dim=0)
std = features.std(dim=0)
features = (features - mean) / std

In [None]:
features[0]

In [None]:
# Save the mean and standard deviation as separate arrays
np.savez("./model/normalization/normalization.npz", mean=mean, std=std)

In [None]:
with open("./model/pkl_files/encoder.pkl", "wb") as file:
    pickle.dump(encoder, file)

In [None]:
# Define a custom PyTorch dataset to wrap the features and labels
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        feature = self.features[index]
        label = self.labels[index]
        return feature, label

In [None]:
dataset = CustomDataset(features, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [None]:
class Net_64_128_64(nn.Module):
    def __init__(self, input_size,num_classes):
        super(Net_64_128_64, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, num_classes)


    def forward(self, x):
        x = F.selu(self.fc1(x))
        x = F.selu(self.fc2(x))
        x = F.selu(self.fc3(x))
        x = self.fc4(x)
        return F.softmax(x)


In [None]:
# Define the network hyperparameters
input_size = 7
num_classes = 22

# Initialize the network
net = Net_64_128_64(input_size, num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.0001)

# Train the network
train_losses = []
val_losses = []
EPOCH = 100
train_accuracies = []
val_accuracies = []

In [None]:
for epoch in range(EPOCH):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataset):
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_loss = running_loss / len(train_dataset)
    train_losses.append(train_loss)
    with torch.no_grad():
        val_loss = 0.0
        for inputs, labels in val_dataset:
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
        val_loss /= len(val_dataset)
        val_losses.append(val_loss)
    if epoch % 10 == 9:
        print(f'Epoch {epoch+1}/{EPOCH}: train loss: {train_loss:.4f} val loss: {val_loss:.4f}')

print('Finished training')


In [None]:
plt.plot(train_losses, label='train loss')
plt.plot(val_losses, label='validation loss')
plt.legend()
plt.show()

In [None]:
model_name = str(datetime.datetime.now()).replace(' ','-').replace(':','-').replace('.','-') + '.hdf5'
file_name = f'./model/{model_name}'
if not os.path.exists('./model/'):
    os.mkdir('./model/')
    print("creating model dir")

In [None]:
torch.save(net.state_dict(), file_name)

In [None]:
model = Net_64_128_64(input_size,num_classes)
model.load_state_dict(torch.load('./model/baseline/baseline.hdf5'))

In [None]:
# Calculate the accuracy
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in train_dataset:
        outputs = model(inputs)
        predicted = outputs.argmax() 
        # print(predicted, labels)
        total += 1
        correct += (predicted == labels)
        # print(predicted)

accuracy = 100 * correct / total
print(f'Accuracy of the network on the train: {accuracy:.2f}%')


In [None]:
# Calculate the accuracy
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in val_dataset:
        # print("Inputs:",inputs)
        outputs = model(inputs)
        predicted = outputs.argmax() 
        # print(predicted, labels)
        total += 1
        correct += (predicted == labels)
        # dec_labels= encoder.inverse_transform(np.array([predicted,labels]))
        # print(f"pred: {dec_labels[0]}, real: {dec_labels[1]}")
        # print(predicted)

accuracy = 100 * correct / total
print(f'Accuracy of the network on the validation: {accuracy:.2f}%')