In [11]:
#判別システム全体
#前処理フェーズ，閾値の学習フェーズ，判別フェーズ

#前処理フェーズ：YOLO
#入力：画像フォルダ
#出力：前処理後の画像

#閾値の学習フェーズ
#入力：教師データ付きのデータセット
#出力：閾値

#判別フェーズ
#入力：画像フォルダ
#出力：判別結果

In [2]:
import os 
import cv2
import glob
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
import scipy.stats as stats
import seaborn as sns
import pandas as pd
from sklearn.metrics import roc_curve
import re
import math
from PIL import Image
from icecream import ic
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,precision_score, recall_score, f1_score
import joblib
import time

In [13]:
start = time.time()

In [14]:
data = "0203_energee_after"

In [15]:
#前処理フェーズ
#yoloでやるときにちゃんと修正

input_folders = [
    f"/home/data/{data}/org/A",
    f"/home/data/{data}/org/B",
    f"/home/data/{data}/org/C",
]

mask_output_folders = [
    f"/home/data/{data}/maskBB/A",
    f"/home/data/{data}/maskBB/B",
    f"/home/data/{data}/maskBB/C",
]
    
crop_output_folders = [
    f"/home/data/{data}/cropBB/A",
    f"/home/data/{data}/cropBB/B",
    f"/home/data/{data}/cropBB/C",
]

maskedimg_output_folders = [
    f"/home/data/{data}/maskedBB/A",
    f"/home/data/{data}/maskedBB/B",
    f"/home/data/{data}/maskedBB/C",
]

# 出力フォルダを作成
for folder in mask_output_folders:
    os.makedirs(folder, exist_ok=True)
for folder in crop_output_folders:
    os.makedirs(folder, exist_ok=True)
for folder in maskedimg_output_folders:
    os.makedirs(folder, exist_ok=True)

for input_folder, mask_output_folder,crop_output_folder,maskedimg_output_folder in zip(input_folders, mask_output_folders,crop_output_folders,maskedimg_output_folders):
    for file in os.listdir(input_folder):
        # 画像ファイルのみを処理
        if file.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.JPEG')):
            name = os.path.basename(file).replace(".JPEG", "")
            file_path = os.path.join(input_folder, file)

            # 画像を読み込む
            image = cv2.imread(file_path)

            # BGRからHSVに変換
            hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            # 青色の範囲を定義
            lower_blue = np.array([20, 100, 100])
            upper_blue = np.array([255, 255, 255])

            # ブルーバックのマスクを作成
            mask = cv2.inRange(hsv, lower_blue, upper_blue)

            # マスクの反転（対象物を白、背景を黒にする）
            mask_inv = cv2.bitwise_not(mask)

            # 境界を検出
            contours, _ = cv2.findContours(mask_inv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            # 最大の輪郭を取得
            largest_contour = max(contours, key=cv2.contourArea)

            # 対象物を切り取るための境界ボックスを取得
            x, y, w, h = cv2.boundingRect(largest_contour)
            cropped_image = image[y:y+h, x:x+w]
            cropped_mask = mask_inv[y:y+h, x:x+w]
            masked_cropped_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
            
            mask_output_file_path = os.path.join(mask_output_folder, file)
            crop_output_file_path = os.path.join(crop_output_folder, file)
            maskedimg_output_file_path = os.path.join(maskedimg_output_folder, file)
        
            # 切り取った画像を保存
            cv2.imwrite(mask_output_file_path, cropped_mask)
            cv2.imwrite(crop_output_file_path, cropped_image)
            cv2.imwrite(maskedimg_output_file_path, masked_cropped_image)

In [16]:
#閾値の学習フェーズ
outputfile = f"/home/data/{data}/keijo_mse.csv"
inputfolder_lists = [
    f"/home/data/{data}/maskBB/A",
    f"/home/data/{data}/maskBB/B",
    f"/home/data/{data}/maskBB/C",
]

#形状
outputfile = f"/home/data/{data}/keijo_mse.csv"
one_dimensional_data_dict = {}
evaluation_results = {}

for folder in inputfolder_lists:
    folder_name = os.path.basename(folder)
    image_paths = glob.glob(os.path.join(folder, '*.JPEG'))
    
    # 画像ファイル数をカウント
    num_images = len(image_paths)
    print(f"Folder {folder_name} contains {num_images} images.")

    for img_path in image_paths:
        # 画像の読み込み
        mask = cv2.imread(img_path)

        # グレースケール画像に変換
        gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)

        # 二値化
        _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # 輪郭を検出し、最大の輪郭を取得
        contours, _ = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if not contours:
            continue
        max_contour = max(contours, key=cv2.contourArea)

        # 最小外接円を取得
        (x, y), radius = cv2.minEnclosingCircle(max_contour)
        radius = int(radius)

        # 重心を計算
        M = cv2.moments(max_contour)
        if M["m00"] != 0:
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
        else:
            cX, cY = 0, 0
        center = (cX, cY)

        # 極座標変換
        h, w = gray.shape
        flags = cv2.INTER_CUBIC + cv2.WARP_FILL_OUTLIERS + cv2.WARP_POLAR_LINEAR
        linear_polar = cv2.warpPolar(gray, (w, h), center, radius, flags)

        # 行ごとの黒ピクセル数をカウント
        black_pixel_count = np.sum(linear_polar == 0, axis=1)
        file_name = os.path.basename(img_path)
        one_dimensional_data_dict[file_name] = black_pixel_count

        # 真円の場合の理想データ（黒ピクセル数が0）
        y_pseudo = np.zeros_like(black_pixel_count)

        # 評価指標の計算
        mae = mean_absolute_error(y_pseudo, black_pixel_count)
        mse = mean_squared_error(y_pseudo, black_pixel_count)
        rmse = np.sqrt(mse)  # RMSEを計算

        evaluation_results[file_name] = {
            'MSE': mse, 
            'folder': folder_name
        }

# MAE、MSE、RMSEの抽出
file_names = list(evaluation_results.keys())
# mae_values = [metrics['MAE'] for metrics in evaluation_results.values()]
mse_values = [metrics['MSE'] for metrics in evaluation_results.values()]
# rmse_values = [metrics['RMSE'] for metrics in evaluation_results.values()]
folders = [metrics['folder'] for metrics in evaluation_results.values()]
# DataFrame に変換
df_mse = pd.DataFrame(evaluation_results).T  # .T で転置して見やすくする
df_mse.reset_index(inplace=True)
df_mse.rename(columns={'index': 'filename'}, inplace=True)
# df_mse.rename(columns={'Folder': 'folder'}, inplace=True)
df_mse['filename'] = df_mse['filename'].astype(str)  # filenameを文字列型に
df_mse['MSE'] = df_mse['MSE'].astype(float)  # MSEを浮動小数型に
df_mse['folder'] = df_mse['folder'].astype(str)  # folderを文字列型に

# df_mse = pd.DataFrame({
#     'File': file_names,
#     'MSE': mse_values,
#     'folder': folders
# })
#サイズ

output_file = f"/home/data/{data}/size_pixelcounts.csv"
input_folders = [
    f"/home/data/{data}/maskBB/A",
    f"/home/data/{data}/maskBB/B",
    f"/home/data/{data}/maskBB/C",
]
# フォルダごとのラベル設定
labels = ['Folder A', 'Folder B', 'Folder C']

# 結果を保存するリスト
data_list = []

# データの収集
for input_folder, label in zip(input_folders, labels):
    for file in os.listdir(input_folder):
        if file.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.JPEG')):
            file_path = os.path.join(input_folder, file)
            image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)

            # 二値化
            _, image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY)

            # 白ピクセルのカウント
            white_pixel_count = np.sum(image == 255)

            # ファイル名（拡張子を除去）
            # filename = os.path.splitext(file)[0]
            filename = file

            # データをリストに追加
            data_list.append([filename, white_pixel_count, label])

# データフレームを作成
df_size = pd.DataFrame(data_list, columns=["filename", "size_count", "folder"])
df_size['folder'] = df_size['folder'].str.replace('Folder ', '')
df_size['filename'] = df_size['filename'].astype(str)  # filenameを文字列型に
df_size['size_count'] = df_size['size_count'].astype(float)  # size_countを浮動小数型に
df_size['folder'] = df_size['folder'].astype(str)  # folderを文字列型に

#襞領域
def fmxy(absfxy, mxy):
    return np.where(absfxy > mxy, 1, 0)

def Min(a, b):
    return np.minimum(a, b)

def G12(theta1, theta2):
    condition1 = (theta2 - np.pi < theta1) & (theta1 < theta2) & (theta2 >= 0)
    result1 = np.abs(theta1 - theta2)
    condition2 = (-np.pi < theta1) & (theta1 < (theta2 - np.pi)) & (theta2 >= 0)
    result2 = theta2 - 2 * np.pi - theta1
    condition3 = (-np.pi < theta1) & (theta1 < (theta2 + np.pi)) & (theta2 < 0)
    result3 = np.abs(theta1 - theta2)
    condition4 = (theta2 + np.pi < theta1) & (theta1 < np.pi) & (theta2 < 0)
    result4 = theta1 - theta2 - 2 * np.pi
    result = np.where(condition1, result1, 
             np.where(condition2, result2, 
             np.where(condition3, result3, 
             np.where(condition4, result4, 0))))
    return result

def SquareSum(I, x, y, h, w, n):
    x1, y1 = x - n, y - n
    x2, y2 = x + n, y + n
    x1, x2 = max(x1, 0), min(x2, w - 2)
    y1, y2 = max(y1, 0), min(y2, h - 2)
    total = I[y2, x2] - I[y1, x2] - I[y2, x1] + I[y1, x1]
    return total

def sdis(Iruv, Imyv, x, y, h, w, n):
    Tr = SquareSum(Iruv, x, y, h, w, n)
    Tm = SquareSum(Imyv, x, y, h, w, n)
    return Tr / Tm

# パラメータ
n = 15
data_list = []

img_folders = [
    f"/home/data/{data}/maskedBB/A",
    f"/home/data/{data}/maskedBB/B",
    f"/home/data/{data}/maskedBB/C",
]
mask_folders = [
    f"/home/data/{data}/maskBB/A",
    f"/home/data/{data}/maskBB/B",
    f"/home/data/{data}/maskBB/C",
]

for img_folder, mask_folder in zip(img_folders, mask_folders):
    folder_name = os.path.basename(img_folder)
    img_files = os.listdir(img_folder)
    mask_files = os.listdir(mask_folder)
    
    for img_file, mask_file in zip(img_files, mask_files):
        img_path = os.path.join(img_folder, img_file)
        mask_path = os.path.join(mask_folder, mask_file)
        
        # 重心 (1)
        img = cv2.imread(img_path)
        mask_img = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        masked_img = cv2.imread(img_path)
        h, w = img.shape[:2]
        
        # 重心を計算
        x_sum, y_sum, count = 0, 0, 0
        for i in range(h):
            for j in range(w):
                if mask_img[i][j] == 255:
                    x_sum += j
                    y_sum += i
                    count += 1
        xc, yc = (x_sum / count, y_sum / count) if count > 0 else (0, 0)
        
        # fθ(x,y)(勾配の方向), |f(x,y)| (2)(3)
        image = cv2.cvtColor(masked_img, cv2.COLOR_BGR2GRAY)
        fdy, fdx = np.gradient(image)
        f0xy = np.arctan2(fdy, fdx)
        absfxy = np.uint8(np.sqrt(fdx**2 + fdy**2))
        
        # C0(x,y)：中心からのベクトルの角度
        height, width = image.shape
        C0xy = np.zeros((height, width))
        for y in range(height):
            for x in range(width):
                dx, dy = x - xc, y - yc
                C0xy[y, x] = np.arctan(dy / dx) if dx != 0 else 0
        
        # f(xy)の勾配ベクトルが中心から(x,y)へのベクトルへ垂直か評価する関数fdisxy(4)(5)
        fdisxy = Min(G12(C0xy + np.pi/2, f0xy)**2, G12(C0xy - np.pi/2, f0xy)**2)
        
        # mxy = |fxy|に対する2n+1×2n+1のメディアンフィルタリングの結果
        kernel_size = 2 * n + 1
        mxy = np.uint8(cv2.medianBlur(absfxy, kernel_size))
        
        # rdis (8)
        rdisxy = fmxy(absfxy, mxy) * fdisxy
        
        # Iruv, Imyv (9)(10)
        Iruv = cv2.integral(rdisxy)
        Imyv = cv2.integral(fmxy(absfxy, mxy).astype(np.uint8))
        
        # sdis計算
        sdisval = np.zeros((image.shape[0], image.shape[1]))
        for y in range(0, image.shape[0], 1):
            for x in range(0, image.shape[1], 1):
                sdisval[y, x] = sdis(Iruv, Imyv, x, y, h, w, n)
        sdisval = np.nan_to_num(sdisval, nan=0.0, posinf=0.0, neginf=0.0)
        
        # 閾値処理
        T = 0.2
        hxy = np.where(sdisval < T, 1, 0)
        hxy2 = cv2.bitwise_and(hxy, hxy, mask=mask_img)
        
        # シイタケ領域のPixel数を計算
        count_mask = np.sum(mask_img == 255)
        count_hida = np.sum(hxy2 == 1)
        R = count_hida / count_mask if count_mask > 0 else 0
        data_list.append((img_file, R,folder_name))
        
output_csv = f"/home/data/{data}/R_values.csv"
df_r = pd.DataFrame(data_list, columns=["filename", "R","folder"])
df_r['filename'] = df_r['filename'].astype(str)  # filenameを文字列型に
df_r['R'] = df_r['R'].astype(float)  # Rを浮動小数型に
df_r['folder'] = df_r['folder'].astype(str)  # folderを文字列型に

# `folder` カラムを削除
df_mse = df_mse.drop(columns=['folder'])
df_size = df_size.drop(columns=['folder'])


# `filename` をキーとして結合
df_merge = df_mse.merge(df_size, on="filename", how="inner").merge(df_r, on="filename", how="inner")

# 'folder' を 'Label' に変更
df_merge['Label'] = df_merge['folder'].apply(lambda x: 0 if x == 'A' else (1 if x == 'B' else 2))

# 'folder' カラムを削除
df_merge = df_merge.drop(columns=['folder'])
# 結果を表示


Folder A contains 48 images.
Folder B contains 30 images.
Folder C contains 38 images.


  return Tr / Tm
  return Tr / Tm


In [3]:
#SVM

# === 1. データの読み込み ===
merged_data_csv = "/home/data/0203_energee_after/merged_data.csv"
df = pd.read_csv(merged_data_csv)
# === 2. 特徴量とターゲット変数の定義 ===
X = df[["MSE", "size_count", "R"]]  # 特徴量
y = df["Label"]  # 目的変数

# === 3. 訓練データとテストデータに分割 ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
# 訓練データとテストデータのクラスごとのカウント
train_counts = y_train.value_counts().sort_index()
test_counts = y_test.value_counts().sort_index()

# 結果を表示
print("Train Data Class Distribution:")
print(train_counts)
print("\nTest Data Class Distribution:")
print(test_counts)


# === 4. 特徴量の標準化 ===
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# === 5. SVMの学習 (RBFカーネル) ===
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', decision_function_shape='ovr')
svm_model.fit(X_train, y_train)


# 学習済みモデルを保存
model_path = "svm_model.pkl"
joblib.dump(svm_model, model_path)

# 標準化のスケーラーも保存
scaler_path = "scaler.pkl"
joblib.dump(scaler, scaler_path)


# === 6. 予測と評価 ===
y_pred = svm_model.predict(X_test)

# 精度の表示
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1: {f1:.4f}")

# クラス分類の詳細なレポート
print("Classification Report:\n", classification_report(y_test, y_pred))

# === 7. 混同行列の可視化 ===
cm = confusion_matrix(y_test, y_pred)
# plt.figure(figsize=(6, 5))
# sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[0, 1, 2], yticklabels=[0, 1, 2])
# plt.xlabel("Predicted Label")
# plt.ylabel("True Label")
# plt.title("Confusion Matrix")
# plt.show()


Train Data Class Distribution:
Label
0    33
1    21
2    27
Name: count, dtype: int64

Test Data Class Distribution:
Label
0    15
1     9
2    11
Name: count, dtype: int64
Accuracy: 0.8571
Precision: 0.8444
Recall: 0.8512
F1: 0.8450
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.87      0.93        15
           1       0.70      0.78      0.74         9
           2       0.83      0.91      0.87        11

    accuracy                           0.86        35
   macro avg       0.84      0.85      0.84        35
weighted avg       0.87      0.86      0.86        35



In [18]:
# === モデルとスケーラーの読み込み ===
model_path = "svm_model.pkl"
scaler_path = "scaler.pkl"

svm_model = joblib.load(model_path)
scaler = joblib.load(scaler_path)

# === 新しいデータの読み込み ===
new_data_csv = "/home/data/0203_energee_after/merged_data.csv"  # 新しいデータ
df_new = pd.read_csv(new_data_csv)

# === 特徴量の抽出と標準化 ===
X_new = df_new[["MSE", "size_count", "R"]]  # 学習時と同じ特徴量を使用
X_new = scaler.transform(X_new)  # 標準化

# === 予測 ===
y_pred_new = svm_model.predict(X_new)

# 結果をDataFrameに追加
df_new["Predicted_Label"] = y_pred_new

# 予測結果の確認
print(df_new[["MSE", "size_count", "R", "Predicted_Label"]])

# CSVとして保存（オプション）
df_new.to_csv("/home/data/0203_energee_after/predicted_results.csv", index=False)


             MSE  size_count         R  Predicted_Label
0    1620.546875    156181.0  0.029932                0
1     846.630342    165731.0  0.022063                0
2    1826.250000    217898.0  0.033801                0
3    2021.876611    221644.0  0.027558                0
4     760.463415    137450.0  0.017892                0
..           ...         ...       ...              ...
111   934.046624    274552.0  0.465006                2
112  1498.111324    247262.0  0.399253                2
113  1800.068182    243032.0  0.370821                2
114  1307.536062    202447.0  0.566033                2
115  1224.985586    238736.0  0.439945                2

[116 rows x 4 columns]


In [19]:
end = time.time()
print(f"Total time: {end - start:.2f} sec")

Total time: 85.72 sec
