In [1]:
import torch
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import torch.nn as nn

In [2]:
#reading data
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

#train data
X_train = df_train.iloc[:, 1:].values
Y_train = df_train.iloc[:, 0].values

#test data
X_test = df_test.iloc[:, :784].values
Y_test = df_test.iloc[:, 784].values

#changing to tensor
X_train_ten = torch.from_numpy(X_train).float()
Y_train_ten = torch.from_numpy(Y_train).long()

X_test_ten = torch.from_numpy(X_test).float()

In [3]:
print(X_train_ten.shape)

torch.Size([42000, 784])


In [4]:
#4 fold splitting
N = X_train.shape[0]
fold_size = N//4

splits_data = []

for fold in range(4):
    start = fold*fold_size
    end = (start + fold_size) if fold < 3 else N

    X_val = X_train_ten[start:end]
    Y_val = Y_train_ten[start:end]

    X_tr = np.vstack((X_train_ten[:start],X_train_ten[end:]))
    Y_tr = np.concatenate((Y_train_ten[:start],Y_train_ten[end:]))

    splits_data.append((X_tr, Y_tr, X_val, Y_val))

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_size = 784
hidden_size = 500
num_classes = len(np.unique(Y_train))
epochs = 100
batch_size = 100
lr = 0.01


# print(num_classes)

# #data training and loading
# for X_tr, Y_tr, X_val, Y_val in splits_data:

#     train_dataset = TensorDataset(X_tr,Y_tr)
#     test_dataset = TensorDataset(X_val,Y_val)

#     train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size,shuffle=True)
#     test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)

train_dataset = TensorDataset(X_train_ten,Y_train_ten)
test_dataset = TensorDataset(X_test_ten)

train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=64,shuffle=False)

examples = next(iter(test_loader))
example_data = examples[0]

In [6]:
#Neural Network
class NeuralNet(nn.Module):
    def __init__(self,input_size, hidden_size, ouput_size):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size,ouput_size)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)

        return out
    
model = NeuralNet(input_size,hidden_size,num_classes).to(device)

#Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr)

In [7]:
#Training the model
n_total_steps = len(train_loader)

for epoch in range(epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        #Forward pass and loss
        ouputs = model(images)
        loss = criterion(ouputs,labels)

        #Backward and optimize
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if(i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{n_total_steps}], Loss [{loss}]')
            

Epoch [1/100], Step [100/420], Loss [0.40081965923309326]
Epoch [1/100], Step [200/420], Loss [0.4748636484146118]
Epoch [1/100], Step [300/420], Loss [0.39880111813545227]
Epoch [1/100], Step [400/420], Loss [0.42630454897880554]
Epoch [2/100], Step [100/420], Loss [0.3179425895214081]
Epoch [2/100], Step [200/420], Loss [0.45024922490119934]
Epoch [2/100], Step [300/420], Loss [0.34330645203590393]
Epoch [2/100], Step [400/420], Loss [0.40921828150749207]
Epoch [3/100], Step [100/420], Loss [0.25752362608909607]
Epoch [3/100], Step [200/420], Loss [0.4787239730358124]
Epoch [3/100], Step [300/420], Loss [0.31863924860954285]
Epoch [3/100], Step [400/420], Loss [0.3813324272632599]
Epoch [4/100], Step [100/420], Loss [0.2601436674594879]
Epoch [4/100], Step [200/420], Loss [0.44279876351356506]
Epoch [4/100], Step [300/420], Loss [0.3147488534450531]
Epoch [4/100], Step [400/420], Loss [0.19391697645187378]
Epoch [5/100], Step [100/420], Loss [0.2984326481819153]
Epoch [5/100], Step [

In [10]:
# Set model to evaluation mode
model.eval()

# Inference without gradients
with torch.no_grad():
    X_test_ten = X_test_ten.to(device)
    outputs = model(X_test_ten)
    _, predicted_labels = torch.max(outputs, dim=1)
    predicted_labels = predicted_labels.cpu().numpy()

# Create DataFrame with ID and Label columns
submission_df = pd.DataFrame({
    'ID': range(len(predicted_labels)),
    'Label': predicted_labels
})

# Save to CSV
submission_df.to_csv("predictions.csv", index=False)