<h1><span style="color:red">Add Color Statistics for Images in Image Collection</span></h1>

### This sample notebook will read survey images and add lightness, hue, brightness, saturation, and RGB values to a new survey version

## 1. Retrieve survey parameters from the URL

In [1]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

<IPython.core.display.Javascript object>

In [2]:
# common imports
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display

import pandas as pd
pd.set_option('display.max_colwidth', 0)
    
import numpy as np
import panel as pn

pn.extension()
def printmd(string):
    display(Markdown(string))

absolutePath = "/home/jovyan/jupyter-suave/temp_csvs/"

# local imports
import sys
sys.path.insert(1, '../../helpers')
import panel_libs as panellibs
import suave_integration as suaveint

# specific imports
from PIL import Image, ImageStat
import glob, os
import csv
import requests
import re



## 2. Read the survey file and navigate to full-size images

In [3]:
# read the csv file
df = panellibs.extract_data(absolutePath + csv_file)
dflen = len(df.columns)

localdzc = dzc_file.replace("https://dzgen.ucsd.edu/dzgen/lib-staging-uploads","/lib-nfs/dzgen")
full_images_location = localdzc.replace("/content.dzc","/full_images/")

In [4]:
from bs4 import BeautifulSoup
from urllib.parse import urljoin

response = requests.get(full_images_location)
soup = BeautifulSoup(response.text, 'html.parser')

image_links = soup.find_all('a', href=True)

image_urls = []

for link in image_links:
    image_url = link['href']
    
    if not image_url.startswith(('http:', 'https:')):
        full_image_url = urljoin(full_images_location, image_url)
    else:
        full_image_url = image_url
    
    image_urls.append(full_image_url)

## 3. Specify color characteristics to extract from images

In [5]:
a = widgets.Dropdown(options=["Compute RGB values", "Don't compute RGB values"])
b = widgets.Dropdown(options=["Compute Lightness values", "Don't compute Lightness values"])
c = widgets.Dropdown(options=["Omit Root-Mean-Square for the selected bands", "Include Root-Mean-Square for the selected bands"])
ui = widgets.VBox([a, b, c])
def f(a, b, c):
    return ((a, b, c))

formula = widgets.interactive_output(f, {'a': a, 'b': b, 'c': c})

display(ui, formula)

printmd("<b><span style='color:red'>Select variables to include in the output, then run the next cell</span></b>")


VBox(children=(Dropdown(options=('Compute RGB values', "Don't compute RGB values"), value='Compute RGB values'…

Output()

<b><span style='color:red'>Select variables to include in the output, then run the next cell</span></b>

In [6]:
# include RGB values
printmd("<b><span style='color:red'>The following color characteristics will be added:</span></b>")
RGB = Light = rms = False
if (a.value == 'Compute RGB values'):
    print('Red-Green-Blue bands: Mean, Median, Standard Deviation: YES')
    RGB = True
if (b.value == 'Compute Lightness values'):
    print('Lightness: Mean, Median, Standard Deviation: YES')
    Light = True
if (c.value == 'Include Root-Mean-Square for the selected bands'):
    print('Root-Mean-Square for the above bands: YES')
    rms = True
    


<b><span style='color:red'>The following color characteristics will be added:</span></b>

Red-Green-Blue bands: Mean, Median, Standard Deviation: YES
Lightness: Mean, Median, Standard Deviation: YES
Root-Mean-Square for the above bands: YES


## 4. Specify band statistics functions ##

In [7]:
def lightness( im_file ):
    im = Image.open(im_file).convert('L')
    stat = ImageStat.Stat(im)
    return [stat.mean[0], stat.median[0], stat.rms[0], stat.stddev[0]]

def RGBstats ( im_file ):
    im = Image.open(im_file).convert('RGB')
    stat = ImageStat.Stat(im)
    return [
        [stat.mean[0], stat.median[0], stat.rms[0], stat.stddev[0]],
        [stat.mean[1], stat.median[1], stat.rms[1], stat.stddev[1]],
        [stat.mean[2], stat.median[2], stat.rms[2], stat.stddev[2]]
    ]
def HSVstats ( im_file ):
    im = Image.open(im_file).convert('HSV')
    stat = ImageStat.Stat(im)
    return [
        [stat.mean[0], stat.median[0], stat.rms[0], stat.stddev[0]],
        [stat.mean[1], stat.median[1], stat.rms[1], stat.stddev[1]],
        [stat.mean[2], stat.median[2], stat.rms[2], stat.stddev[2]]
    ]

In [8]:
import numpy as np
import colorsys
import requests
from PIL import Image
import numpy as np
from io import BytesIO


# Calculate lightness statistics for an RGB image represented as a NumPy array
def calculate_lightness(rgb_img):
    # Apply the weights to the RGB channels
    weights = np.array([0.299, 0.587, 0.114])
    lightness_img = np.sum(rgb_img * weights, axis=2)  # Sum along the third axis (R, G, B)
    
    # Calculate statistics (mean, median, RMS, and standard deviation)
    lightness_mean = np.mean(lightness_img)
    lightness_median = np.median(lightness_img)
    lightness_rms = np.sqrt(np.mean(lightness_img**2))
    lightness_std = np.std(lightness_img)
    return [lightness_mean, lightness_median, lightness_rms, lightness_std]

# Calculate HSV statistics for an RGB image represented as a NumPy array
def calculate_hsv_stats(rgb_img):
    # Initialize arrays to store the HSV channels
    hue_channel = []
    saturation_channel = []
    value_channel = []

    # Iterate through the rows and columns of the RGB image
    for row in rgb_img:
        for r, g, b in row:
            h, s, v = colorsys.rgb_to_hsv(r / 255.0, g / 255.0, b / 255.0)
            hue_channel.append(h)
            saturation_channel.append(s)
            value_channel.append(v)

    # Convert lists to NumPy arrays
    hue_channel = np.array(hue_channel)
    saturation_channel = np.array(saturation_channel)
    value_channel = np.array(value_channel)

    # Calculate statistics (mean, median, RMS, and standard deviation) for each HSV channel

    # Statistics for Hue channel
    hue_mean = np.mean(hue_channel)
    hue_median = np.median(hue_channel)
    hue_rms = np.sqrt(np.mean(hue_channel**2))
    hue_std = np.std(hue_channel)

    # Statistics for Saturation channel
    saturation_mean = np.mean(saturation_channel)
    saturation_median = np.median(saturation_channel)
    saturation_rms = np.sqrt(np.mean(saturation_channel**2))
    saturation_std = np.std(saturation_channel)

    # Statistics for Value channel
    value_mean = np.mean(value_channel)
    value_median = np.median(value_channel)
    value_rms = np.sqrt(np.mean(value_channel**2))
    value_std = np.std(value_channel)

    return [
        [hue_mean, hue_median, hue_rms, hue_std],
        [saturation_mean, saturation_median, saturation_rms, saturation_std],
        [value_mean, value_median, value_rms, value_std]
    ]

# Calculate RGB statistics for an RGB image represented as a NumPy array
def calculate_rgb_stats(rgb_img):
    # Split the RGB image into separate Red, Green, and Blue channels
    red_channel = rgb_img[:, :, 0]
    green_channel = rgb_img[:, :, 1]
    blue_channel = rgb_img[:, :, 2]

    # Calculate statistics (mean, median, RMS, and standard deviation) for each RGB channel

    # Statistics for Red channel
    red_mean = np.mean(red_channel)
    red_median = np.median(red_channel)
    red_rms = np.sqrt(np.mean(red_channel**2))
    red_std = np.std(red_channel)

    # Statistics for Green channel
    green_mean = np.mean(green_channel)
    green_median = np.median(green_channel)
    green_rms = np.sqrt(np.mean(green_channel**2))
    green_std = np.std(green_channel)

    # Statistics for Blue channel
    blue_mean = np.mean(blue_channel)
    blue_median = np.median(blue_channel)
    blue_rms = np.sqrt(np.mean(blue_channel**2))
    blue_std = np.std(blue_channel)

    return [
        [red_mean, red_median, red_rms, red_std],
        [green_mean, green_median, green_rms, green_std],
        [blue_mean, blue_median, blue_rms, blue_std]
    ]

## 5. Process images

In [9]:
# Processing image file and adding to data frame
all_data = []
counter = 0
a = widgets.Label(value="0% done")
display(a)

numfiles = len(image_urls)
for file in image_urls:
    file_data = {} # data for this particular file
    response = requests.get(file)
    img = Image.open(BytesIO(response.content))

    # Convert image to RGB values
    img = img.convert('RGB')
    rgb_img = np.array(img) 
    try:
        l1 = calculate_lightness(rgb_img)
        h1 = calculate_hsv_stats(rgb_img)
        r1 = calculate_rgb_stats(rgb_img)
        file_data['#img'] = os.path.basename(file)[:-4]

        file_data['Lightness_mean']   = l1[0]
        file_data['Lightness_median'] = l1[1]
        file_data['Lightness_rms']    = l1[2]
        file_data['Lightness_std']    = l1[3]

        file_data['Hue_mean']          = h1[0][0]
        file_data['Hue_median']        = h1[0][1]
        file_data['Hue_rms']           = h1[0][2]
        file_data['Hue_std']           = h1[0][3]
        file_data['Saturation_mean']   = h1[1][0]
        file_data['Saturation_median'] = h1[1][1]
        file_data['Saturation_rms']    = h1[1][2]
        file_data['Saturation_std']    = h1[1][3]
        file_data['Brightness_mean']        = h1[2][0]
        file_data['Brightness_median']      = h1[2][1]
        file_data['Brightness_rms']         = h1[2][2]
        file_data['Brightness_std']         = h1[2][3]
        
        file_data['Red_mean']          = r1[0][0]
        file_data['Red_median']        = r1[0][1]
        file_data['Red_rms']           = r1[0][2]
        file_data['Red_std']           = r1[0][3]
        file_data['Green_mean']        = r1[1][0]
        file_data['Green_median']      = r1[1][1]
        file_data['Green_rms']         = r1[1][2]
        file_data['Green_std']         = r1[1][3]
        file_data['Blue_mean']         = r1[2][0]
        file_data['Blue_median']       = r1[2][1]
        file_data['Blue_rms']          = r1[2][2]
        file_data['Blue_std']          = r1[2][3]
        
        
        all_data.append(file_data)    
    except Exception as e:
        print(file, "There was an issue: ", e)
    counter += 1
    a.value = str(int(counter / numfiles * 100)) + "% done"
        
newdf = pd.DataFrame(all_data).fillna('')
printmd("<b><span style='color:red'>All files processed</span></b>")

Label(value='0% done')

<b><span style='color:red'>All files processed</span></b>

## 6. Add the result to the survey dataframe

In [10]:
columnTitles = [
    '#img',
    'Brightness_mean','Brightness_median']
if rms:
    columnTitles.append('Brightness_rms')
columnTitles.extend(('Brightness_std','Hue_mean','Hue_median'))
if rms:
    columnTitles.append('Hue_rms')
columnTitles.extend(('Hue_std','Saturation_mean','Saturation_median'))
if rms:
    columnTitles.append('Saturation_rms')
columnTitles.append('Saturation_std')

if Light:
    columnTitles.extend(('Lightness_mean', 'Lightness_median'))
    if rms: 
        columnTitles.append('Lightness_rms')
    columnTitles.append('Lightness_std')
    
if RGB:
    columnTitles.extend(('Red_mean','Red_median'))
    if rms:
        columnTitles.append('Red_rms')
    columnTitles.extend(('Red_std','Green_mean','Green_median'))
    if rms:
        columnTitles.append('Green_rms')
    columnTitles.extend(('Green_std','Blue_mean','Blue_median'))
    if rms:
        columnTitles.append('Blue_rms')
    columnTitles.append('Blue_std')

printmd("<br><b><span style='color:red'>The following columns will be added:</span></b>")    
print(columnTitles[1:])

newdf = newdf[columnTitles]


<br><b><span style='color:red'>The following columns will be added:</span></b>

['Brightness_mean', 'Brightness_median', 'Brightness_rms', 'Brightness_std', 'Hue_mean', 'Hue_median', 'Hue_rms', 'Hue_std', 'Saturation_mean', 'Saturation_median', 'Saturation_rms', 'Saturation_std', 'Lightness_mean', 'Lightness_median', 'Lightness_rms', 'Lightness_std', 'Red_mean', 'Red_median', 'Red_rms', 'Red_std', 'Green_mean', 'Green_median', 'Green_rms', 'Green_std', 'Blue_mean', 'Blue_median', 'Blue_rms', 'Blue_std']


In [12]:
# add #number to numeric column names

collist = []
for col in newdf.columns:
    col1 = col+"#number"
    collist.append(col1)
newdf.columns = collist
newdf = newdf.rename(columns={'#img#number': '#img'})
df = pd.merge(df, newdf, on='#img', how='outer')
printmd("<b><span style='color:red'>Dataframe created</span></b>")

<b><span style='color:red'>Dataframe created</span></b>

## 7. Examine the generated dataframe

In [15]:
with pd.option_context("display.max_columns", None):
    if any("geometry" in col for col in newdf.columns):
        display(newdf.drop(['geometry'],axis=1))
    else:
        display(newdf)
    
    

Unnamed: 0,#img,Brightness_mean#number,Brightness_median#number,Brightness_std#number,Hue_mean#number,Hue_median#number,Hue_std#number,Saturation_mean#number,Saturation_median#number,Saturation_std#number,Lightness_mean#number,Lightness_median#number,Lightness_std#number,Red_mean#number,Red_median#number,Red_std#number,Green_mean#number,Green_median#number,Green_std#number,Blue_mean#number,Blue_median#number,Blue_std#number
0,US,0.80143,0.976471,0.287507,0.479801,0.568376,0.347426,0.121498,0.021878,0.210077,192.849155,231.9315,75.840826,185.885225,224.0,80.762098,194.674994,235.0,75.895539,201.712731,248.0,75.563624
1,image_not_available,0.909161,1.0,0.283182,0.0,0.0,0.0,0.0,0.0,0.0,231.835983,255.0,72.21149,231.835983,255.0,72.21149,231.835983,255.0,72.21149,231.835983,255.0,72.21149


## 8. Save the new version of CSV file, and give a name to new survey

In [16]:
new_file = suaveint.save_csv_file(df, absolutePath, csv_file)

<b><span style='color:red'>A new temporary file will be created at: </span></b>

/home/jovyan/jupyter-suave/temp_csvs/joeykaminsky2_Tester_13_v1.csv


In [17]:
#Input survey name

from IPython.display import display
input_text = widgets.Text(placeholder='Enter Survey Name...')
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

printmd("<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>")
# Display input text box widget for input
display(input_text)

display(output_text)


  input_text.on_submit(bind_input_to_output)


<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>

Text(value='', placeholder='Enter Survey Name...')

Text(value='')

In [None]:
#Print survey name
survey_name = output_text.value
printmd("<b><span style='color:red'>Survey Name is: </span></b>" + survey_name)

## 9. Generate the survey and create survey URL

In [None]:
suaveint.create_survey(survey_url,new_file, survey_name, dzc_file, user, csv_file, view, views)