In [42]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
import os
import glob


## 사용자 정의 데이터셋  

In [43]:
class CropDataset(Dataset):
    def __init__(self, json_dir, img_dir, transform=None):
        print(f"Creating dataset from {json_dir} and {img_dir}")
        self.json_dir = json_dir
        self.img_dir = img_dir
        self.transform = transform
        self.json_files = glob.glob(os.path.join(json_dir, "*.json"))
        self.data = []
        print(f"Found {len(self.json_files)} json files")
        for json_file in self.json_files:
            with open(json_file, 'r', encoding='utf-8-sig') as f:
                item = json.load(f)
                img_id = item['ID']
                img_path = os.path.join(self.img_dir, f"{img_id}.jpg")
                if os.path.exists(img_path):
                    self.data.append(item)
                else:
                    print(f"Warning: No matching image for JSON file {json_file}")
        print(f"Loaded {len(self.data)} valid samples out of {len(self.json_files)} JSON files")
        # for json_file in self.json_files:
        #     with open(json_file, 'r', encoding='utf-8-sig') as f:
        #         self.data.append(json.load(f))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        img_id = item['ID']
        
        img_path = os.path.join(self.img_dir, f"{img_id}.jpg")

        # print(img_path)
        
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"No image file found for ID: {img_id}")
        # else:
            # print(f"Searching for image: {img_path}")            
            # print(f"Current working directory: {os.getcwd()}")
            # print(f"Contents of image directory:")
            # print(os.listdir(self.img_dir))            
        
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)

        features = torch.tensor([
            item['GSD'],
            item['LONG'],
            item['LAT'],
            item['GROWTH_1'],
            item['GROWTH_2']
        ], dtype=torch.float32)

        yield_ = torch.tensor(item['YIELD'], dtype=torch.float32)

        return image, features, yield_


## 모델

In [44]:
class CropYieldModel(nn.Module):
    def __init__(self):
        super(CropYieldModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 28 * 28 + 5, 128)
        self.fc2 = nn.Linear(128, 1)
        self.relu = nn.ReLU()

    def forward(self, img, features):
        x = self.pool(self.relu(self.conv1(img)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, 64 * 28 * 28)
        x = torch.cat((x, features), dim=1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## 데이터 전처리 및 로딩

In [45]:
# 데이터 전처리 및 로딩
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = CropDataset('json_directory/', 'images_directory/', transform=transform)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


Creating dataset from json_directory/ and images_directory/
Found 50 json files
Loaded 50 valid samples out of 50 JSON files


## 모델, 손실 함수, 옵티마이저 초기화 

In [46]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CropYieldModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## 학습루프

In [47]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for i, (images, features, labels) in enumerate(dataloader):
        images = images.to(device)
        features = features.to(device)
        labels = labels.to(device)

        outputs = model(images, features)
        loss = criterion(outputs, labels.unsqueeze(1))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/100], Loss: 142816.6094
Epoch [2/100], Loss: 129073.8047
Epoch [3/100], Loss: 51095.5586
Epoch [4/100], Loss: 30476.1973
Epoch [5/100], Loss: 39896.3320
Epoch [6/100], Loss: 23533.9824
Epoch [7/100], Loss: 5468.3447
Epoch [8/100], Loss: 41826.0000
Epoch [9/100], Loss: 18222.2441
Epoch [10/100], Loss: 12312.0449
Epoch [11/100], Loss: 20045.3340
Epoch [12/100], Loss: 13747.0586
Epoch [13/100], Loss: 7740.0879
Epoch [14/100], Loss: 22451.9434
Epoch [15/100], Loss: 10946.6855
Epoch [16/100], Loss: 11028.8994
Epoch [17/100], Loss: 14664.2744
Epoch [18/100], Loss: 14650.8145
Epoch [19/100], Loss: 7841.8213
Epoch [20/100], Loss: 4693.2178
Epoch [21/100], Loss: 1515.0146
Epoch [22/100], Loss: 9797.6055
Epoch [23/100], Loss: 5721.3516
Epoch [24/100], Loss: 6243.1777
Epoch [25/100], Loss: 11017.9346
Epoch [26/100], Loss: 2067.0542
Epoch [27/100], Loss: 4877.7949
Epoch [28/100], Loss: 2930.8066
Epoch [29/100], Loss: 3953.2661
Epoch [30/100], Loss: 4675.0957
Epoch [31/100], Loss: 2914.849

In [48]:
torch.save(model.state_dict(), 'crop_yield_model.pth')

In [49]:
model.eval()
with torch.no_grad():
    new_image_id = "0120200824MS02N000070"  # 예시 ID
    
    # 새 JSON 파일 읽기
    with open(f"json_directory/{new_image_id}.json", 'r', encoding='utf-8-sig') as f:
        new_data = json.load(f)
    
    # 새 이미지 파일 찾기 (jpg만)
    new_image_jpg = f"images_directory/{new_image_id}.jpg"
    
    if os.path.exists(new_image_jpg):
        new_image_path = new_image_jpg
    else:
        raise FileNotFoundError(f"No image file found for ID: {new_image_id}")
    
    new_image = transform(Image.open(new_image_path).convert('RGB')).unsqueeze(0).to(device)
    new_features = torch.tensor([[
        new_data['GSD'],
        new_data['LONG'],
        new_data['LAT'],
        new_data['GROWTH_1'],
        new_data['GROWTH_2']
    ]], dtype=torch.float32).to(device)
    
    prediction = model(new_image, new_features)
    print(f'Predicted Yield: {prediction.item():.2f}')

Predicted Yield: 245.98
