# Convolutional Neural Network

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from transformers import ViTFeatureExtractor, ViTForImageClassification
import torch  # Import PyTorch for tensor operations
import dask.dataframe as dd
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from transformers import get_scheduler
import time


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [6]:
df_train = dd.read_csv('model/train_data.csv', assume_missing=True).compute()

# Set the number of samples to read from each class (adjust this as needed)
samples_per_class = 100  # Modify this number based on your memory constraints

# Create a new DataFrame to hold the sampled data
sampled_data = []

# Loop through each class and sample the specified number of images
for label in df_train['784'].unique():
    class_data = df_train[df_train['784'] == label].sample(n=min(samples_per_class, len(df_train[df_train['784'] == label])), random_state=42)
    sampled_data.append(class_data)

# Concatenate all the sampled data into a single DataFrame
sampled_df = pd.concat(sampled_data)

# Separate labels and features
labels = sampled_df[['784']].values  # Extract labels as a NumPy array
sampled_df.drop(columns=['784'], inplace=True)  # Drop the label column from features

# One-hot encode the labels for classification (17 classes)
categorical_data = to_categorical(labels, num_classes=17)

# Reshape the training data to 28x28x1 (grayscale) and convert to RGB (3 channels)
l = []
for i in range(sampled_df.shape[0]):
    # Reshape to (28, 28, 1) and then convert to (28, 28, 3) for RGB
    image = sampled_df.iloc[i].values.reshape(28, 28, 1)
    image_rgb = np.concatenate((image, image, image), axis=-1)  # Convert to 3 channels
    l.append(image_rgb)

# Convert list to NumPy array for further processing
l = np.array(l)

# Split the dataset into training and testing sets
train_X, test_X, train_y, test_y = train_test_split(l, categorical_data, test_size=0.2, random_state=42)

# Preprocess images for Vision Transformer
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

# Extract features from images
train_features = [feature_extractor(image, return_tensors="pt")["pixel_values"] for image in train_X]
test_features = [feature_extractor(image, return_tensors="pt")["pixel_values"] for image in test_X]

# Convert feature lists to PyTorch tensors
train_features = torch.cat(train_features)  # Combine the list of tensors into a single tensor
test_features = torch.cat(test_features)



In [7]:
train_labels = torch.tensor([np.argmax(y) for y in train_y])
test_labels = torch.tensor([np.argmax(y) for y in test_y])


In [8]:
batch_size = 15
train_data = TensorDataset(train_features, train_labels)
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

# Prepare DataLoader for testing
test_data = TensorDataset(test_features, test_labels)
test_loader = DataLoader(test_data, batch_size=batch_size)  # No shuffling for test data


In [9]:
model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=17  # We have 17 classes (digits + symbols)
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# Define optimizer, loss function, and scheduler
optimizer = AdamW(model.parameters(), lr=5e-5)  # Use AdamW instead of Adam
criterion = CrossEntropyLoss()
num_epochs = 5
scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_epochs * len(train_loader))


In [11]:
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    start_time = time.time()  # Record the start time of the epoch

    for batch in train_loader:
        inputs, labels = batch
        outputs = model(inputs).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        total_loss += loss.item()

    end_time = time.time()  # Record the end time of the epoch
    epoch_time = end_time - start_time  # Calculate the duration of the epoch

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}, Time: {epoch_time:.2f} seconds")

Epoch 1/5, Loss: 2.1576, Time: 818.36 seconds
Epoch 2/5, Loss: 1.1452, Time: 812.30 seconds
Epoch 3/5, Loss: 0.7534, Time: 810.67 seconds
Epoch 4/5, Loss: 0.5864, Time: 836.26 seconds
Epoch 5/5, Loss: 0.5101, Time: 810.65 seconds


In [12]:
model.save_pretrained('model/vit_model')