# Hands-on Session for Customized Image Mining 2

Credit to: [Youngeui Kim](https://cis.appstate.edu/directory/youngeui-kim-phd), [Yuxiao (Rain) Luo](https://yuxiaoluo.github.io)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/YuxiaoLuo/AI_Intro/blob/main/week11_Customized_ImageMining_2.ipynb)

## Step 1: Create your folders and save image data respectively

- create train data directories
- create validation data directories

See the graph below:
```
/train/
    /Positive/
        positive1.jpeg
        positive2.jpeg
    /Negative/
        negative1.jpeg
        negative2.jpeg
/val/
    /Positive/
        positive1.jpeg
        positive2.jpeg
    /Negative/
        negative1.jpeg
        negative2.jpeg
```

- Download the files needed for model training: https://github.com/YuxiaoLuo/AI_Intro/tree/main/data/customized_image_mining

If you are using Google Colab, mount your google drive to Python.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Step 2: Data loading and Data Generating diversifying

- If you are using Google Colab.

In [None]:
# Define data paths
train_dir = '/content/drive/MyDrive/train'
val_dir = '/content/drive/MyDrive/val'

- If you run it locally, change your folder path accordingly.

In [None]:
# Define data paths
train_dir = '/data/customized_image_mining/train'
val_dir = '/data/customized_image_mining/val'

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input

# Data generators
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Use VGG16 preprocessing
    rotation_range=30,                        # Augmentation: Random rotations
    width_shift_range=0.2,                    # Augmentation: Horizontal shifts
    height_shift_range=0.2,                   # Augmentation: Vertical shifts
    shear_range=0.2,                          # Augmentation: Shearing
    zoom_range=0.2,                           # Augmentation: Zooming
    horizontal_flip=True,                     # Augmentation: Flipping
    fill_mode='nearest'                       # Fill missing pixels after transformations
)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Load data from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),                   # Resize images to 224x224 (VGG16 input size)
    batch_size=32,                            # Number of images per batch
    class_mode='categorical'                  # Labels are categorical
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Check the classes
print(train_generator.class_indices)  # {'Negative': 0, 'Positive': 1}

## Step 3: Customize the VGG16 Model


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load VGG16 base model
base_model = VGG16(weights='imagenet', include_top=False)

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling
x = Dense(1024, activation='relu')(x)  # Fully connected layer
predictions = Dense(2, activation='softmax')(x)  # Output layer with 2 classes

# Create the full model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## Step 4: Train the Custom Layers & Save the customized model

In [None]:
# Data generators for training and validation
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = train_datagen.flow_from_directory(
    '/content/drive/MyDrive/Colab Notebooks/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
val_generator = val_datagen.flow_from_directory(
    '/content/drive/MyDrive/Colab Notebooks/val',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Train the model
model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10
)

# Save the model
model.save('/content/drive/MyDrive/Colab Notebooks/custom_vgg16_sentiment_model.h5')
Step 5-1: Applying the Customized Model to Your Data
--- Once trained, we can apply the model as follows:
from tensorflow.keras.models import load_model
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the customized model
model = load_model('/content/drive/MyDrive/Colab Notebooks/custom_vgg16_sentiment_model.h5')

# Function to preprocess and predict
def predict_sentiment(img_path):
    img = load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = preprocess_input(np.expand_dims(img_array, axis=0))
    prediction = model.predict(img_array)
    sentiment_labels = ['Negative', 'Positive']
    return sentiment_labels[np.argmax(prediction)], np.max(prediction)


# Predict on a new (individual) image
sentiment, confidence = predict_sentiment('/content/drive/MyDrive/Colab Notebooks/image.jpg')
print(f"Sentiment: {sentiment}, Confidence: {confidence}")

## Step 5-2 (1) : Applying the Customized Model to Your Bulk Data

In [None]:
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# Load the customized model
model = load_model('/content/drive/MyDrive/Colab Notebooks/custom_vgg16_sentiment_model.h5')


# Function to predict sentiment
def predict_sentiment(img_path):
    img = load_img(img_path, target_size=(224, 224))  # Resize image to 224x224
    img_array = img_to_array(img)                     # Convert to array
    img_array = preprocess_input(np.expand_dims(img_array, axis=0))  # Preprocess for VGG16
    prediction = model.predict(img_array)             # Predict with the model
    sentiment_labels = ['Negative', 'Positive']       # Define labels
    return sentiment_labels[np.argmax(prediction)], np.max(prediction)  # Return sentiment and confidence


# checking the data
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ReviewData_updated.csv', encoding='ISO-8859-1')
filtered_data = df.dropna(subset=['Image'])
filtered_data

## Step 5-2 (2) : Applying the Customized Model to Your Bulk Data

In [1]:
# load my data to predict
if __name__ == "__main__":
    # Load image metadata
    df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ReviewData_updated.csv', encoding='ISO-8859-1')
    filtered_data = df.dropna(subset=['Image'])

    # Initialize variables for results
    sentiment_counts = {'Negative': 0, 'Positive': 0}
    predicted_sentiments = []
    image_names = []
    confidences = []

    # Process each image
    for index, row in filtered_data.iterrows():
        image_name = str(int(row['Image']))
        img_path = f'/content/drive/MyDrive/Colab Notebooks/bulk/{image_name}.jpeg'

        try:
            # Predict sentiment for the image
            sentiment, confidence = predict_sentiment(img_path)

            # Store results
            predicted_sentiments.append(sentiment)
            image_names.append(image_name)
            confidences.append(confidence)

            # Update sentiment counts
            if sentiment in sentiment_counts:
                sentiment_counts[sentiment] += 1

        except Exception as e:
            print(f"Error processing image {image_name}: {e}")

    # Print sentiment counts
    print("Sentiment Counts:")
    for sentiment, count in sentiment_counts.items():
        print(f"{sentiment}: {count}")

    # Create a results DataFrame
    results_df = pd.DataFrame({
        'image_name': image_names,
        'predicted_sentiment': predicted_sentiments,
        'confidence': confidences
    })


    # Save results to CSV
    results_df.to_csv('/content/drive/MyDrive/Colab Notebooks/ReviewData_updated_With_Image.csv', index=False)

NameError: name 'pd' is not defined