### Rice Type Classification


#### Getting our dataset from kaggle


In [None]:
!pip install opendatasets --quiet
import opendatasets as od
od.download("https://www.kaggle.com/datasets/mssmartypants/rice-type-classification")

#### Installing libraries and choosing device

In [None]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#### Data Preprocessing and Normalization

Normalization:
we are making each columns' maximum value to 1:
take all the values from each column devide by the largest for each

In [None]:
data_df = pd.read_csv("/content/rice-type-classification/riceClassification.csv")
data_df.dropna(inplace=True)# 'inplace = True' drops any missing value
data_df.drop(['id'], axis=1, inplace=True) #axis=1 means it is column

original_df=data_df.copy()
for column in data_df.columns:
  data_df[column]=data_df[column]/data_df[column].abs().max()

#### Train and Test splitting



In [None]:
X=np.array(data_df.iloc[:,:-1]) #iloc means specific columns
Y=np.array(data_df.iloc[:,-1]) #we are taking only the last column

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)


#### Creating Dataset Class

We want to convert our data to the one that PyTorch understands.

PyTorch understands tensor.


In [None]:
class dataset(Dataset):
  def __init__(self, X, Y):
    self.X=torch.tensor(X, dtype=torch.float32).to(device)
    self.Y=torch.tensor(Y, dtype=torch.float32).to(device)
  def __len__(self):
    return len(self.X)
  def __getitem__(self, idx):
    return self.X[idx], self.Y[idx]

In [None]:
training_data=dataset(X_train, y_train)
validation_data=dataset(X_val, y_val)
testing_data=dataset(X_test, y_test)

In [None]:
train_dataloader=DataLoader(training_data, batch_size=32, shuffle=True)
val_dataloader=DataLoader(validation_data, batch_size=32, shuffle=True)
test_dataloader=DataLoader(testing_data, batch_size=32, shuffle=True)

#### Creating Module

In [None]:
class MyModel(nn.Module):
  def __init__(self):
    super(MyModel, self).__init__()
    self.input_layer = nn.Linear(X.shape[1], 10)
    self.linear = nn.Linear(10, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x=self.input_layer(x)
    x=self.linear(x)
    x=self.sigmoid(x)
    return x
model=MyModel().to(device)

#### We can check the summary of our Model

In [None]:
summary(model, (X.shape[1],))

#### Creating loss function and optimizer

In [None]:
criterion=nn.BCELoss()
optimizer = Adam(model.parameters(), lr=0.01)


#### Training Loop

In [None]:
total_loss_train_plot = []
total_loss_validation_plot = []
total_acc_train_plot = []
total_acc_validation_plot = []

epochs=10
for epoch in range(epochs):
  total_acc_train=0
  total_loss_train=0
  total_acc_val=0
  total_loss_val=0
  for data in train_dataloader:
    x, y = data
    prediction=model(x).squeeze(1)
    batch_loss=criterion(prediction, y)
    total_loss_train+=batch_loss.item()
    acc=(prediction.round()==y).sum().item()
    total_acc_train+=acc
    batch_loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  with torch.no_grad():
    for data in val_dataloader:
      x, y= data
      prediction=model(x).squeeze(1)
      batch_loss=criterion(prediction, y)
      total_loss_val+=batch_loss.item()
      acc=(prediction.round()==y).sum().item()
      total_acc_val+=acc
  total_loss_train_plot.append(round(total_loss_train/1000, 4))
  total_loss_validation_plot.append(round(total_loss_val/1000,4))

  total_acc_train_plot.append(round(total_acc_train/training_data.__len__()*100,4))
  total_acc_validation_plot.append(round(total_acc_val/validation_data.__len__()*100,4))
  print(f'''Epoch: {epoch+1} Train Loss {round(total_loss_train/1000, 4)} Train Acc:{round(total_acc_train/training_data.__len__()*100,4)}
        Validation Loss: {round(total_loss_val/1000,4)}''')
  print("="*25)

#### Testing Loop

In [None]:
with torch.no_grad():
  total_acc_test=0
  total_loss_test=0
  for data in test_dataloader:
    x, y = data

    prediction = model(x).squeeze(1)
    batch_loss_test=criterion(prediction, y)
    total_loss_test+=batch_loss_test
    acc=(prediction.round()==y).sum().item()
    total_acc_test+=acc
  print(f"Accuracy: {round(total_acc_test/testing_data.__len__()*100,4)}")

#### Plotting our results

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(15,5))
axs[0].plot(total_loss_train_plot, label="Train Loss")
axs[0].plot(total_loss_validation_plot, label="Validation Loss")
axs[0].set_title("Training and Validation Loss over Epochs")
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')
axs[0].set_ylim(0,1)
axs[0].legend()

axs[1].plot(total_acc_train_plot, label="Train Accuracy")
axs[1].plot(total_acc_validation_plot, label="Validation Accuracy")
axs[1].set_title("Training and Validation Accuracy over Epochs")
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')
axs[1].set_ylim(0,100)
axs[1].legend()

plt.show()

#### How our Model predicsts? This is sample input and how to get the prediction. The numbers and input is just random.


In [None]:
area=2353/original_df['Area'].abs().max()
MajorAxis=81/original_df['MajorAxisLength'].abs().max()
MinorAxis=43/original_df['MinorAxisLength'].abs().max()
Eccentricity=43/original_df['Eccentricity'].abs().max()
ConvexArea=35/original_df['ConvexArea'].abs().max()
EquivDiameter=45/original_df['EquivDiameter'].abs().max()
Extent=34/original_df['Extent'].abs().max()
Perimeter=567/original_df['Perimeter'].abs().max()
Roundness=56/original_df['Roundness'].abs().max()
AspectRation=2/original_df['AspectRation'].abs().max()
my_prediction=model(torch.tensor([area,MajorAxis,MinorAxis,Eccentricity,ConvexArea, EquivDiameter,Extent,Perimeter,Roundness, AspectRation], dtype=torch.float32).to(device))
round(my_prediction.item())