# Fruit Classification with CNN (Apples vs Pears)

This notebook trains a CNN to classify fruit images as either apples or pears using PyTorch. It checks for local data first and handles extraction.

## 1. Import Required Libraries

In [None]:
import os
import gdown
import zipfile
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Write you code here
import pandas as pd

# Load the file, skipping metadata/comment lines
df = pd.read_csv('soc-sign-epinions.txt', sep='\t', comment='#', header=None)

# Manually set column names
df.columns = ['FromNodeId', 'ToNodeId', 'Sign']

# Display the first few rows (optional)
print(df.head())


   FromNodeId  ToNodeId  Sign
0           0         1    -1
1           1    128552    -1
2           2         3     1
3           4         5    -1
4           4       155    -1


In [3]:
! pip install torch torchvision torchaudio
! pip install torch-geometric


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
df.columns

Index(['FromNodeId', 'ToNodeId', 'Sign'], dtype='object')

In [6]:
import torch
from torch_geometric.data import Data

# Get edge_index from FromNodeId and ToNodeId
edge_index = torch.tensor(df[['FromNodeId', 'ToNodeId']].values.T, dtype=torch.long)

# Use Sign column as edge attributes
edge_attr = torch.tensor(df['Sign'].values, dtype=torch.float)

# Dummy node features (one-hot encoded)
x = torch.eye(torch.cat((edge_index[0], edge_index[1])).max().item() + 1)

# Create the PyTorch Geometric Data object
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)

print(data)

NameError: name 'df' is not defined

In [2]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch
class SimpleGCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleGCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


In [5]:
# num_nodes = torch.cat((edge_index[0], edge_index[1])).max().item() + 1
model = SimpleGCN(input_dim=data.num_node_features, hidden_dim=16, output_dim=2)  # change dims if needed
out = model(data)
print(out)

NameError: name 'data' is not defined

In [None]:
import torch
import torch.nn.functional as F

# Dummy labels (you would replace this with real node classes)
y = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.long)

# Mask to identify which nodes to train on
train_mask = torch.tensor([True, True, True, False, False, False])


In [None]:
data.y = y
data.train_mask = train_mask


In [None]:
model = SimpleGCN(input_dim=data.num_node_features, hidden_dim=16, output_dim=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')


In [None]:
model.eval()
_, pred = model(data).max(dim=1)
print("Predictions:", pred)
