# Asset Class Inference

## Imports 

In [1]:
# Make sure we've got the latest version of fastai:
!pip install -Uqq fastai
!pip install azure-cognitiveservices-search-imagesearch

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandas-profiling 3.2.0 requires joblib~=1.1.0, but you have joblib 1.4.2 which is incompatible.


Collecting azure-cognitiveservices-search-imagesearch
  Downloading azure_cognitiveservices_search_imagesearch-2.0.1-py2.py3-none-any.whl.metadata (5.7 kB)
Collecting msrest>=0.6.21 (from azure-cognitiveservices-search-imagesearch)
  Downloading msrest-0.7.1-py3-none-any.whl.metadata (21 kB)
Collecting azure-common~=1.1 (from azure-cognitiveservices-search-imagesearch)
  Downloading azure_common-1.1.28-py2.py3-none-any.whl.metadata (5.0 kB)
Collecting azure-mgmt-core<2.0.0,>=1.2.0 (from azure-cognitiveservices-search-imagesearch)
  Downloading azure_mgmt_core-1.5.0-py3-none-any.whl.metadata (4.3 kB)
Collecting azure-core>=1.31.0 (from azure-mgmt-core<2.0.0,>=1.2.0->azure-cognitiveservices-search-imagesearch)
  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Collecting isodate>=0.6.0 (from msrest>=0.6.21->azure-cognitiveservices-search-imagesearch)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Collecting requests-oauthlib>=0.5.0 (from msrest>=0.6.21->azur


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from fastai.vision.all import *
import os
import shutil
import fastai
from pathlib import Path
import requests
import json

### Define Constants

In [3]:
key = "a54e22634b134d548616088105f5c070"

### Define Functions

In [None]:
def download_images(urls, save_folder='downloaded_images'):
    # Create a directory to save images if it doesn't exist
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    for i, url in enumerate(urls):
        try:
            # Get the image content from the URL
            response = requests.get(url, stream=True)
            response.raise_for_status()  # Ensure we handle bad responses

            # Determine the image extension based on Content-Type header
            content_type = response.headers['Content-Type']
            if 'image' in content_type:
                extension = content_type.split('/')[-1]
            else:
                print(f"Skipping non-image URL: {url}")
                continue

            # Create the file path for saving the image
            image_path = os.path.join(save_folder, f'image_{i}.{extension}')

            # Save the image in chunks
            with open(image_path, 'wb') as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)

            print(f"Image {i+1} downloaded successfully: {image_path}")

        except requests.exceptions.RequestException as e:
            print(f"Error downloading image {i+1} from {url}: {e}")

In [None]:
def search_images_bing_rest(key, term, count=50, offset=0):
    """Search for images using Bing API with a direct REST call"""
    endpoint = "https://api.bing.microsoft.com/v7.0/images/search"
    headers = {"Ocp-Apim-Subscription-Key": key}
    params = {
        "q": term,
        "count": count,
        "offset": offset,
        "minHeight": 128,
        "minWidth": 128,
    }

    response = requests.get(endpoint, headers=headers, params=params)
    
    if response.status_code == 200:
        results = response.json()
        if "value" in results:
            return [img["contentUrl"] for img in results["value"]]
        else:
            print("No images found.")
            return []
    else:
        print(f"Error occurred: {response.status_code} - {response.text}")
        return []


### Get Images

In [None]:
asset_types = "Centrifugal Pump", "Centrifugal Fan", "Robot"
path = Path("assets")

In [None]:
# Delete existing directory if it already exists
if path.exists():
    shutil.rmtree(path)
    
# Create a fresh directory for storing images
path.mkdir()

# Iterate through each asset type
for o in asset_types:
    # Create a subdirectory for each asset type
    dest = (path / o)
    dest.mkdir(parents=True, exist_ok=True)
    
    # Search Bing for images using the asset type + "in Factory" as query
    # Retrieve up to 150 images per asset type
    results = search_images_bing_rest(key, f'{o} in Factory', count=150)
    
    # Download the found images and save them in the corresponding asset type folder
    download_images(results, save_folder=path/o)

In [None]:
# Get a list of all image files from the specified directory
fns = get_image_files(path)

# Display the list of found image files
fns

In [4]:
# Get failed images
failed = verify_images(fns)

# Display failed images
failed

NameError: name 'fns' is not defined

In [None]:
# Define the data structure for training
assets = DataBlock(
    # Specify input (images) and output (categories) data types
    blocks=(ImageBlock, CategoryBlock),
    
    # Use get_image_files function to load the images
    get_items=get_image_files,
    
    # Split data into training (80%) and validation (20%) sets with fixed random seed
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    
    # Use the parent folder name as the category label
    get_y=parent_label,
    
    # Transform all images to 128x128 pixels
    item_tfms=Resize(128))

In [None]:
# Create data loaders from the DataBlock, which will handle batching and loading of images during training
dls = assets.dataloaders(path)

In [None]:
# Display a batch of images from the validation set
# Show up to 4 images in a single row to visually inspect the data
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
# Update the DataBlock with new image transformations
assets = assets.new(
    # Apply random resized cropping to 224x224 pixels, allowing crops as small as 50% of original
    item_tfms=RandomResizedCrop(224, min_scale=0.5),
    # Disable any batch-level transformations
    batch_tfms=None)

# Create new data loaders with the updated transformations
dls = assets.dataloaders(path)

In [None]:
# Create a CNN model using ResNet18 architecture, pre-trained on ImageNet
# Use error rate as the evaluation metric
learn = cnn_learner(dls, resnet18, metrics=error_rate)

# Fine-tune the model for 4 epochs
# This uses fastai's recommended fine-tuning approach:
# 1. First trains only the new head layers
# 2. Then gradually unfreezes and trains the entire network
learn.fine_tune(4)

In [None]:
# Create an interpretation object to analyze model performance
# This computes predictions on the validation set for analysis
interp = ClassificationInterpretation.from_learner(learn)

# Generate and display a confusion matrix
# Shows how well the model distinguishes between different asset classes
# Helps identify which classes are commonly confused with each other
interp.plot_confusion_matrix()

In [None]:
# Display the top 5 validation set images with highest losses
# Shows 4 rows of images with their predicted and actual labels
# Uses a 10x10 figure size for better visibility
interp.plot_top_losses(5, nrows=4, figsize=(10,10))

In [None]:
# Save the trained model
learn.export("asset_classification.pkl")