In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder



In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x79e3a69f16f0>

In [3]:
device="cuda" if torch.cuda.is_available else "cpu"
print(device)

cuda


In [4]:
data=pd.read_excel("/content/Dry_Bean_Dataset.xlsx")
data.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.27275,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


In [5]:
data.isnull().sum()

Unnamed: 0,0
Area,0
Perimeter,0
MajorAxisLength,0
MinorAxisLength,0
AspectRation,0
Eccentricity,0
ConvexArea,0
EquivDiameter,0
Extent,0
Solidity,0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13611 entries, 0 to 13610
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Area             13611 non-null  int64  
 1   Perimeter        13611 non-null  float64
 2   MajorAxisLength  13611 non-null  float64
 3   MinorAxisLength  13611 non-null  float64
 4   AspectRation     13611 non-null  float64
 5   Eccentricity     13611 non-null  float64
 6   ConvexArea       13611 non-null  int64  
 7   EquivDiameter    13611 non-null  float64
 8   Extent           13611 non-null  float64
 9   Solidity         13611 non-null  float64
 10  roundness        13611 non-null  float64
 11  Compactness      13611 non-null  float64
 12  ShapeFactor1     13611 non-null  float64
 13  ShapeFactor2     13611 non-null  float64
 14  ShapeFactor3     13611 non-null  float64
 15  ShapeFactor4     13611 non-null  float64
 16  Class            13611 non-null  object 
dtypes: float64(1

In [7]:
label_encoder = LabelEncoder()
data['Class_Encoded'] = label_encoder.fit_transform(data['Class'])


In [8]:
data = data.drop('Class', axis=1)


In [9]:
data['Class_Encoded'].value_counts()

Unnamed: 0_level_0,count
Class_Encoded,Unnamed: 1_level_1
3,3546
6,2636
5,2027
4,1928
2,1630
0,1322
1,522


In [10]:
X=data.iloc[:,:-1]
y=data.iloc[:,-1]
X.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.27275,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166


In [11]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)


In [12]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [13]:
#custom class for dataset
class BeanDataset(Dataset):
  def __init__(self,features,targets):
    self.features=torch.tensor(features,dtype=torch.float32)
    self.targets=torch.tensor(targets.values,dtype=torch.long)


  def __len__(self):
    return len(self.features)

  def __getitem__(self,idx):
    return self.features[idx],self.targets[idx]

In [14]:
#creating train and test dataset objects
train_dataset=BeanDataset(X_train,y_train)
test_dataset=BeanDataset(X_test,y_test)

In [15]:
#creating train and test dataloader
train_loader=DataLoader(train_dataset,batch_size=32,shuffle=True,pin_memory=True)
test_loader=DataLoader(test_dataset,batch_size=32,shuffle=False,pin_memory=True)

In [16]:
len(train_loader)

341

In [17]:
len(test_loader)

86

In [18]:
#custom nn model
class MyNN(nn.Module):
  def __init__(self,num_features):
    super(MyNN,self).__init__()
    self.model = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 7)
)


  def forward(self,x):
    return self.model(x)

In [19]:
learning_rate=0.001
epochs=25

In [20]:
#instatiate the model
model=MyNN(X_train.shape[1])
model=model.to(device)
#loss
criterion=nn.CrossEntropyLoss()
#optimizer
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

In [21]:
#training loop
for epoch in range(epochs):
  total_epoch_loss=0

  for batch_features,batch_labels in train_loader:
    batch_features=batch_features.to(device)
    batch_labels=batch_labels.to(device)

    #outputs
    outputs=model(batch_features)

    #loss
    loss=criterion(outputs,batch_labels)

    #optimizer
    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    total_epoch_loss+=loss.item()

  print(f"Epoch {epoch+1}/{epochs}, Loss: {total_epoch_loss/len(train_loader)}")


Epoch 1/25, Loss: 0.4401281188867571
Epoch 2/25, Loss: 0.24987491128469141
Epoch 3/25, Loss: 0.234144203435859
Epoch 4/25, Loss: 0.2271690682625491
Epoch 5/25, Loss: 0.22356528302660086
Epoch 6/25, Loss: 0.21581546317709507
Epoch 7/25, Loss: 0.2212379579309407
Epoch 8/25, Loss: 0.2098843899946059
Epoch 9/25, Loss: 0.20873589678244156
Epoch 10/25, Loss: 0.20427963772346197
Epoch 11/25, Loss: 0.2057621343077715
Epoch 12/25, Loss: 0.20184288042818696
Epoch 13/25, Loss: 0.1998597097329118
Epoch 14/25, Loss: 0.19996124842474539
Epoch 15/25, Loss: 0.19757460932796303
Epoch 16/25, Loss: 0.19682073685725676
Epoch 17/25, Loss: 0.19459755525069414
Epoch 18/25, Loss: 0.19937886003830915
Epoch 19/25, Loss: 0.19290079711731578
Epoch 20/25, Loss: 0.19461902436906817
Epoch 21/25, Loss: 0.19217639284814733
Epoch 22/25, Loss: 0.19074176672485568
Epoch 23/25, Loss: 0.1876742562205364
Epoch 24/25, Loss: 0.19211391386879154
Epoch 25/25, Loss: 0.18747554419021453


In [22]:
#evaluation
model.eval()

MyNN(
  (model): Sequential(
    (0): Linear(in_features=16, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=256, out_features=128, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): ReLU()
    (8): Linear(in_features=64, out_features=7, bias=True)
  )
)

In [23]:
#evaluation on test data

correct=0
total=0
with torch.no_grad():
  for batch_features,batch_labels in test_loader:
    batch_features=batch_features.to(device)
    batch_labels=batch_labels.to(device)

    outputs=model(batch_features)
    _,predicted=torch.max(outputs,1)

    total+=batch_labels.size(0)
    correct+=(predicted==batch_labels).sum().item()

  print(f"Accuracy on test data: {100*correct/total}%")



Accuracy on test data: 93.20602276900478%


In [24]:
correct=0
total=0
with torch.no_grad():
  for batch_features,batch_labels in train_loader:
    batch_features=batch_features.to(device)
    batch_labels=batch_labels.to(device)

    outputs=model(batch_features)
    _,predicted=torch.max(outputs,1)

    total+=batch_labels.size(0)
    correct+=(predicted==batch_labels).sum().item()

  print(f"Accuracy on train data: {100*correct/total}%")


Accuracy on train data: 93.38721528288023%
