In [27]:
from scipy.spatial.distance import cdist
import pandas as pd
import torch
import torch_geometric
from torch_geometric.data import Data, DataLoader
from sklearn.model_selection import train_test_split



In [28]:


# CSVファイルを読み込み
data = pd.read_csv('/home/takakin/test/data/data_2018to2019.csv')

# 「農地以外」を除外し、インデックスをリセット
data = data[data['Crop'] != '農地以外'].reset_index(drop=True)

# 作付品目+発病指数をエンコーディング
CropDis_dummies = pd.get_dummies(data['Crop+Dis'], prefix='作付')
data = pd.concat([data, CropDis_dummies], axis=1)
data

Unnamed: 0,Crop,Dis,Crop+Dis,long,lati,next_Dis,作付_コンニャク0,作付_コンニャク1,作付_コンニャク2,作付_コンニャク3,作付_不耕作,作付_緑肥,作付_耕作放棄,作付_野菜等
0,コンニャク,0,コンニャク0,138.839876,36.285263,1,True,False,False,False,False,False,False,False
1,コンニャク,1,コンニャク1,138.838936,36.286036,1,False,True,False,False,False,False,False,False
2,野菜等,0,野菜等,138.837028,36.284767,0,False,False,False,False,False,False,False,True
3,野菜等,0,野菜等,138.836226,36.285697,0,False,False,False,False,False,False,False,True
4,不耕作,0,不耕作,138.792865,36.298967,0,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
982,コンニャク,2,コンニャク2,138.816101,36.294572,1,False,False,True,False,False,False,False,False
983,コンニャク,1,コンニャク1,138.817619,36.293356,0,False,True,False,False,False,False,False,False
984,コンニャク,1,コンニャク1,138.816773,36.293613,2,False,True,False,False,False,False,False,False
985,不耕作,0,不耕作,138.816238,36.293779,2,False,False,False,False,True,False,False,False


In [29]:
# 緯度経度を配列に変換
location = data[['lati', 'long']].values

# 全ての圃場間の距離を計算
distances = cdist(location, location, metric='euclidean')

# エッジの最大距離を設定 
max_distance = 0.001

# グラフデータセットを初期化
graph_dataset = []

In [30]:
# 各圃場のグラフを作成
for i, row1 in data.iterrows():
    node_features = []
    node_feature = row1[CropDis_dummies.columns].values
    node_features.append(node_feature)

    # 翌年の発病程度 (next_Dis) を二値ラベルに変換
    next_disease_level = row1['next_Dis']
    label = 0 if next_disease_level in [0, 1] else 1

    edges = []
    edge_features = []
    for j, row2 in data.iterrows():
        if i != j:
            distance = distances[i, j]
            if distance <= max_distance:
                # 一定距離以内の圃場のノードとエッジを追加
                # ノード追加
                neighbor_feature = row2[CropDis_dummies.columns].values
                node_features.append(neighbor_feature)

                # エッジ追加
                edges.append([i, j])  
                edge_feature = (max_distance - distance) / max_distance
                edge_features.append(edge_feature)

    # データをPyTorch Tensorに変換
    node_features = torch.tensor(node_features, dtype=torch.float)
    label = torch.tensor([label], dtype=torch.float)
    edges = torch.tensor(edges, dtype=torch.long).t().contiguous()  # (2, E) の形状に変換
    edge_features = torch.tensor(edge_features, dtype=torch.float).view(-1, 1)  # (E, 1) の形状に変換

    # Data オブジェクトを作成
    graph_data = Data(x=node_features, edge_index=edges, edge_attr=edge_features, y=label)
    graph_dataset.append(graph_data)

# 作成したグラフの数を確認
num_graphs = len(graph_dataset)
print(f"作成したグラフの数: {num_graphs}")
graph_dataset


作成したグラフの数: 987


[Data(x=[7, 8], edge_index=[2, 6], edge_attr=[6, 1], y=[1]),
 Data(x=[8, 8], edge_index=[2, 7], edge_attr=[7, 1], y=[1]),
 Data(x=[6, 8], edge_index=[2, 5], edge_attr=[5, 1], y=[1]),
 Data(x=[10, 8], edge_index=[2, 9], edge_attr=[9, 1], y=[1]),
 Data(x=[17, 8], edge_index=[2, 16], edge_attr=[16, 1], y=[1]),
 Data(x=[17, 8], edge_index=[2, 16], edge_attr=[16, 1], y=[1]),
 Data(x=[17, 8], edge_index=[2, 16], edge_attr=[16, 1], y=[1]),
 Data(x=[17, 8], edge_index=[2, 16], edge_attr=[16, 1], y=[1]),
 Data(x=[15, 8], edge_index=[2, 14], edge_attr=[14, 1], y=[1]),
 Data(x=[7, 8], edge_index=[2, 6], edge_attr=[6, 1], y=[1]),
 Data(x=[10, 8], edge_index=[2, 9], edge_attr=[9, 1], y=[1]),
 Data(x=[21, 8], edge_index=[2, 20], edge_attr=[20, 1], y=[1]),
 Data(x=[19, 8], edge_index=[2, 18], edge_attr=[18, 1], y=[1]),
 Data(x=[17, 8], edge_index=[2, 16], edge_attr=[16, 1], y=[1]),
 Data(x=[18, 8], edge_index=[2, 17], edge_attr=[17, 1], y=[1]),
 Data(x=[22, 8], edge_index=[2, 21], edge_attr=[21, 1], 

In [31]:
num_node_features = graph_dataset[0].num_node_features
num_classes = 2  


In [32]:

# 学習用データと評価用データに分割
train_data, test_data = train_test_split(graph_dataset, test_size=0.2, random_state=58)

print(f"学習用グラフ数: {len(train_data)}, テスト用グラフ数: {len(test_data)}")

学習用グラフ数: 789, テスト用グラフ数: 198


In [33]:
# DataLoaderの設定
batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# バッチを確認する関数
for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()

Step 1:
Number of graphs in the current batch: 64
DataBatch(x=[594, 8], edge_index=[2, 530], edge_attr=[530, 1], y=[64], batch=[594], ptr=[65])

Step 2:
Number of graphs in the current batch: 64
DataBatch(x=[587, 8], edge_index=[2, 523], edge_attr=[523, 1], y=[64], batch=[587], ptr=[65])

Step 3:
Number of graphs in the current batch: 64
DataBatch(x=[611, 8], edge_index=[2, 547], edge_attr=[547, 1], y=[64], batch=[611], ptr=[65])

Step 4:
Number of graphs in the current batch: 64
DataBatch(x=[629, 8], edge_index=[2, 565], edge_attr=[565, 1], y=[64], batch=[629], ptr=[65])

Step 5:
Number of graphs in the current batch: 64
DataBatch(x=[604, 8], edge_index=[2, 540], edge_attr=[540, 1], y=[64], batch=[604], ptr=[65])

Step 6:
Number of graphs in the current batch: 64
DataBatch(x=[639, 8], edge_index=[2, 575], edge_attr=[575, 1], y=[64], batch=[639], ptr=[65])

Step 7:
Number of graphs in the current batch: 64
DataBatch(x=[638, 8], edge_index=[2, 574], edge_attr=[574, 1], y=[64], batch=[63



In [34]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)

        return x

model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(8, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [35]:
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 171):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

RuntimeError: index 643 is out of bounds for dimension 0 with size 636