# Data Generation
#3 classes 

In [13]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

# Create synthetic classification data
np.random.seed(42)

# Generate 3-class classification problem
X, y = make_classification(
    n_samples=1000,
    n_features=3,
    n_informative=3,
    n_redundant=0,
    n_classes=3,
    n_clusters_per_class=1,
    random_state=42
)

# Create class names
class_names = ['Class_A', 'Class_B', 'Class_C']
y_labels = [class_names[i] for i in y]

# Create DataFrame
df = pd.DataFrame(X, columns=['feature1', 'feature2', 'feature3'])
df['target'] = y_labels

# Save to CSV
df.to_csv('data1.csv', index=False)

print("Classification dataset created!")
print(f"Dataset shape: {df.shape}")
print(f"Class distribution:\n{df['target'].value_counts()}")
print("\nFirst 5 rows:")
print(df.head())

Classification dataset created!
Dataset shape: (1000, 4)
Class distribution:
target
Class_A    334
Class_C    333
Class_B    333
Name: count, dtype: int64

First 5 rows:
   feature1  feature2  feature3   target
0 -0.590320 -2.393971 -1.787638  Class_C
1 -1.913492 -1.392190 -0.832541  Class_C
2 -1.388480  1.447356  1.358486  Class_B
3 -1.293794  1.012438  0.080761  Class_A
4 -0.547365 -0.174167 -1.298140  Class_A


#Import Library

In [14]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#data load

In [15]:
df = pd.read_csv('data1.csv')
x = df.iloc[:, :3].values
y = df.iloc[:, 3].values.reshape(-1, 1)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)

  y = column_or_1d(y, warn=True)


#Test train split and Scaling

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
scaler_y = StandardScaler()
x_train = scaler_X.fit_transform(x_train)
x_test = scaler_X.transform(x_test)
# y isn't needed

#Convert to tensor

In [17]:
x_train = torch.FloatTensor(x_train).to(device)
x_test = torch.FloatTensor(x_test).to(device)
y_train = torch.FloatTensor(y_train).to(device)
y_test = torch.FloatTensor(y_test).to(device)

#Custom Dataset

In [18]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):

    def __init__(self,features,labels):
        # lets convert features and labels to tensors at the time of initialization
        self.features = torch.tensor(features, dtype = torch.float32)
        #Features expectes to be float and label to be long

        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self,idx):

        return self.features[idx] , self.labels[idx]

In [19]:
training_data = CustomDataset(x_train, y_train)
testing_data = CustomDataset(x_test, y_test)

  self.features = torch.tensor(features, dtype = torch.float32)
  self.labels = torch.tensor(labels, dtype=torch.long)


In [20]:
train_loader = DataLoader(training_data, batch_size = 32 , shuffle=True , pin_memory= True)

test_loader = DataLoader(testing_data, batch_size = 32 , shuffle=False, pin_memory= True)

#Define Model

In [21]:
# Define NN class

class MyNN(nn.Module):

    def __init__(self,input_dim,num_classes):

        super().__init__()

        self.model = nn.Sequential(
            nn.Linear(input_dim,128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128,64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64,num_classes)
            # nn.Softmax()
            # ->adding because pytorch will add it  automatically in cross entropy loss
        )

    def forward(self,x):

        return self.model(x)

In [22]:
# epochs , learning rate
epochs =100
learning_rate = 0.1

In [23]:
#Model object, Loss  and optimizer

model = MyNN(x_train.shape[1],3)
model = model.to(device)
# Loss function

loss_function = nn.CrossEntropyLoss()

#Optimizer

optimizer = optim.SGD(model.parameters(), lr = learning_rate)

In [24]:
for epoch in range (epochs):

    total_loss = 0

    for batch_features ,batch_labels in train_loader:

        #Forward pass
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

        outputs = model(batch_features)

        #loss calculate
        # make grad zero before calculating loss

        optimizer.zero_grad()
        loss = loss_function(outputs , batch_labels)

        #Backward pass
        loss.backward()

        #optimizer step
        optimizer.step()

        #total loss
        total_loss = total_loss + loss.item()

    # avg loss is loss per batch in each epoch
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch : {epoch+1} , Average Loss : {avg_loss}")



Epoch : 1 , Average Loss : 0.7632016706466674
Epoch : 2 , Average Loss : 0.4777733635902405
Epoch : 3 , Average Loss : 0.3939438784122467
Epoch : 4 , Average Loss : 0.3683742934465408
Epoch : 5 , Average Loss : 0.3580405843257904
Epoch : 6 , Average Loss : 0.3450698322057724
Epoch : 7 , Average Loss : 0.3368533533811569
Epoch : 8 , Average Loss : 0.30685728788375854
Epoch : 9 , Average Loss : 0.3197144964337349
Epoch : 10 , Average Loss : 0.3112308233976364
Epoch : 11 , Average Loss : 0.2887367409467697
Epoch : 12 , Average Loss : 0.28368624329566955
Epoch : 13 , Average Loss : 0.28584530502557753
Epoch : 14 , Average Loss : 0.27205793917179105
Epoch : 15 , Average Loss : 0.2786222031712532
Epoch : 16 , Average Loss : 0.2730552199482918
Epoch : 17 , Average Loss : 0.2685362747311592
Epoch : 18 , Average Loss : 0.26501808047294617
Epoch : 19 , Average Loss : 0.25409778118133547
Epoch : 20 , Average Loss : 0.2553979021310806
Epoch : 21 , Average Loss : 0.24716923981904984
Epoch : 22 , Av

In [25]:
model.eval()

MyNN(
  (model): Sequential(
    (0): Linear(in_features=3, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [26]:
# evaluation code from scratch
total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in test_loader:

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    _, predicted = torch.max(outputs, 1)

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

print(correct/total)

0.95


In [27]:
# evaluation code from scratch on training data
total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in train_loader:

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    _, predicted = torch.max(outputs, 1)

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()
  print(correct/total)

0.95375
