In [157]:
import glob
import os
import numpy as np
from sklearn import linear_model, metrics
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from GoogletNet import GoogLeNet

In [158]:
# 이미지 불러오기 및 전처리
def load_and_preprocess_images(img_paths, target_size=(224,224)):
    img_array_list = []
    for img_path in img_paths:
        img = np.array(Image.open(tf.io.gfile.GFile(img_path,'rb')).convert('RGB').resize(target_size, Image.BILINEAR), dtype=np.float32)
        img = img / 255.0
        img = np.expand_dims(img, axis=0)
        img_array_list.append(img)
    return np.vstack(img_array_list)

def create_cav_training_set(concepts, bottleneck, acts):
    x = []
    labels = []
    labels2text = {}
    min_data_points = np.min([acts[concept][bottleneck].shape[0] for concept in acts.keys()])

    for I, concept in enumerate(concepts):
        x.extend(acts[concept][bottleneck][:min_data_points].reshape(min_data_points, -1))
        labels.extend([I] * min_data_points)
        labels2text[I] = concept
    
    x = np.array(x)
    labels = np.array(labels)

    return x, labels, labels2text

def train_lm(lm, x, y, labels2text):
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.33, stratify=y)
    
    lm.fit(x_train, y_train)
    y_pred = lm.predict(x_test)
    
    num_classes = max(y) + 1
    acc = {}
    num_correct = 0
    for class_id in range(num_classes):
      # get indices of all test data that has this class.
      idx = (y_test == class_id)
      acc[labels2text[class_id]] = metrics.accuracy_score(
          y_pred[idx], y_test[idx])
      # overall correctness is weighted by the number of examples in this class.
      num_correct += (sum(idx) * acc[labels2text[class_id]])
    acc['overall'] = float(num_correct) / float(len(y_test))
    tf.compat.v1.logging.info('acc per class %s' % (str(acc)))
    return acc

In [152]:
model = GoogLeNet()
layer_names = ['mixed4c', 'mixed4d', 'mixed4e']
concept_names = ['striped']
random_concept_names = ['random1', 'random2', 'random3']
concept_pairs = []

for c in concept_names:
    for rc in random_concept_names:
        concept_pairs.append([c, rc])

layer_outputs = [model.get_layer(name).output for name in layer_names]
activation_model = Model(inputs=model.input, outputs=layer_outputs)

In [111]:
input_imgs = {}
concepts = {}
randoms = {}

concept_path = glob.glob('test_data/striped/*')
random_paths = [
    glob.glob('test_data/random1/*'),
    glob.glob('test_data/random2/*'),
    glob.glob('test_data/random3/*')
]

concept_imgs = load_and_preprocess_images(concept_path)
random_imgs = [load_and_preprocess_images(paths) for paths in random_paths]

concepts['striped'] = concept_imgs
input_imgs['concepts'] = concepts

for i, random_img in enumerate(random_imgs):
   randoms[f'random{i+1}'] = random_img

input_imgs['randoms'] = randoms

In [115]:
for k in input_imgs.keys():
    for k2 in input_imgs[k].keys():
        activations = activation_model.predict(input_imgs[k][k2])
        for i, act in enumerate(activations):
            np.save(f'./acts/{k2}_{layer_names[i]}', act, allow_pickle=False)



In [130]:
# 딕셔너리 초기화
acts = {}

# 파일 불러와서 딕셔너리에 저장
for file_path in glob.glob('./acts/*'):
    # 파일 이름에서 concept와 layer 추출
    file_name = os.path.basename(file_path)
    parts = file_name.split('_')
    concept = parts[0]
    layer = parts[1].replace('.npy', '')
    
    # 데이터 불러오기
    data = np.load(file_path)
    
    # 딕셔너리에 저장
    if concept not in acts:
        acts[concept] = {}
    acts[concept][layer] = data

# 결과 확인
for concept, layers in acts.items():
    for layer, data in layers.items():
        print(f"Concept: {concept}, Layer: {layer}, Data shape: {data.shape}")

Concept: random2, Layer: mixed4c, Data shape: (42, 14, 14, 512)
Concept: random2, Layer: mixed4d, Data shape: (42, 14, 14, 528)
Concept: random2, Layer: mixed4e, Data shape: (42, 14, 14, 832)
Concept: striped, Layer: mixed4c, Data shape: (50, 14, 14, 512)
Concept: striped, Layer: mixed4e, Data shape: (50, 14, 14, 832)
Concept: striped, Layer: mixed4d, Data shape: (50, 14, 14, 528)
Concept: random1, Layer: mixed4e, Data shape: (44, 14, 14, 832)
Concept: random1, Layer: mixed4d, Data shape: (44, 14, 14, 528)
Concept: random1, Layer: mixed4c, Data shape: (44, 14, 14, 512)
Concept: random3, Layer: mixed4e, Data shape: (40, 14, 14, 832)
Concept: random3, Layer: mixed4d, Data shape: (40, 14, 14, 528)
Concept: random3, Layer: mixed4c, Data shape: (40, 14, 14, 512)


In [171]:
x, labels, labels2text = create_cav_training_set(concept_pairs[1], 'mixed4c', acts)

In [172]:
lm = linear_model.SGDClassifier(alpha=0.1, max_iter=1000, tol=1e-3)

In [173]:
acc = train_lm(lm, x, labels, labels2text)

INFO:tensorflow:acc per class {'striped': 0.8461538461538461, 'random2': 0.7142857142857143, 'overall': 0.7777777777777778}


In [174]:
cavs = [-1*lm.coef_[0], lm.coef_[0]]

### 남은거
타겟 클래스와 개념의 cav과 계산 후 tcav 스코어 계산