In [172]:
import kagglehub
from numpy import integer

# Download latest version
path = kagglehub.dataset_download("muratkokludataset/date-fruit-datasets")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\liude\.cache\kagglehub\datasets\muratkokludataset\date-fruit-datasets\versions\1


In [173]:
import os
print(os.listdir(path+'/Date_Fruit_Datasets'))
data_path = path + '/Date_Fruit_Datasets/Date_Fruit_Datasets.xlsx' 

['Date_Fruit_Datasets.arff', 'Date_Fruit_Datasets.xlsx', 'Date_Fruit_Datasets_Citation_Request.txt']


In [174]:
!pip install openpyxl

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple


In [175]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.preprocessing import LabelEncoder

if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print('GPU available')
else:
    device = torch.device('cpu')
    print('CPU available')

GPU available


In [176]:
features = pd.read_excel(data_path)

classes = features.pop('Class')

In [177]:
feature_tensors = [torch.tensor(row, dtype=torch.float32) for row in features.values]
feature_tensors = torch.stack(feature_tensors)

mean = torch.mean(feature_tensors, dim=0)
std = torch.std(feature_tensors, dim=0)

feature_tensors = (feature_tensors - mean) / std
feature_tensors.shape

torch.Size([898, 34])

In [186]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(classes)
tensor_labels = torch.tensor(labels, dtype=torch.long)
tensor_labels.shape

torch.Size([898])

In [187]:
class FruitDataset(Dataset):
    def __init__(self, feature, labels):
        self.features = feature
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [188]:
dataset = FruitDataset(feature_tensors, tensor_labels)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [189]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(34, 128)
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(64, 7)
        # 在训练神经网络时，通常不需要在模型中直接包含Softmax层，尤其是在使用损失函数如categorical_crossentropy时。
        # 这是因为这些损失函数内部已经实现了Softmax操作。

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x


model = Model().to(device)

In [190]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)

In [191]:
num_epochs = 100
patience = 10
best_val_loss = float('inf')
patience_counter = 0

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_features, batch_labels in train_loader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        optimizer.zero_grad()
        batch_output = model(batch_features)
        loss = criterion(batch_output, batch_labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}")
    
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch_features, batch_labels in val_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            batch_output = model(batch_features)
            loss = criterion(batch_output, batch_labels)
            total_val_loss += loss.item()
    val_loss = total_val_loss / len(val_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}")
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        patience_counter += 1
        
    if patience_counter >= patience:
        print("Early stopping")
        break
        

Epoch 1/100, Training Loss: 0.8792
Epoch 1/100, Validation Loss: 0.3672
Epoch 2/100, Training Loss: 0.5590
Epoch 2/100, Validation Loss: 0.3132
Epoch 3/100, Training Loss: 0.4347
Epoch 3/100, Validation Loss: 0.2485
Epoch 4/100, Training Loss: 0.3929
Epoch 4/100, Validation Loss: 0.2365
Epoch 5/100, Training Loss: 0.2982
Epoch 5/100, Validation Loss: 0.2256
Epoch 6/100, Training Loss: 0.3013
Epoch 6/100, Validation Loss: 0.2249
Epoch 7/100, Training Loss: 0.2811
Epoch 7/100, Validation Loss: 0.2086
Epoch 8/100, Training Loss: 0.2870
Epoch 8/100, Validation Loss: 0.2510
Epoch 9/100, Training Loss: 0.2543
Epoch 9/100, Validation Loss: 0.2153
Epoch 10/100, Training Loss: 0.2638
Epoch 10/100, Validation Loss: 0.2829
Epoch 11/100, Training Loss: 0.2686
Epoch 11/100, Validation Loss: 0.2053
Epoch 12/100, Training Loss: 0.2179
Epoch 12/100, Validation Loss: 0.1829
Epoch 13/100, Training Loss: 0.2062
Epoch 13/100, Validation Loss: 0.1998
Epoch 14/100, Training Loss: 0.1854
Epoch 14/100, Valida

In [202]:
correct = 0
for idx, feature in enumerate(feature_tensors):
    tensor = feature.unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        output = model(tensor)
        probabilities = F.softmax(output, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1)
    if predicted_class == tensor_labels[idx]:
        correct += 1
print(f"Accuracy: {100 * (correct/len(tensor_labels))}%")


Accuracy: 96.99331848552339%


In [195]:
# 保存Model
torch.save(model.state_dict(), "models/date_fruit_best_model.pth")


In [201]:
model = Model().to(device)  # Model 是你定义的模型类的名称
model.load_state_dict(torch.load("models/date_fruit_best_model.pth"))
model.eval()  # 设置为评估模式，这对于使用BatchNorm和Dropout层是必要的


Model(
  (fc1): Linear(in_features=34, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)