## Crop Analysis for English, Arabic, and Paired English+Arabic Memes

In [None]:
import logging
import shlex
import subprocess
import sys
import io
import pandas as pd
from collections import namedtuple
from pathlib import Path

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle

logging.basicConfig(level=logging.ERROR)

In [None]:
import platform

BIN_MAPS = {"Darwin": "mac", "Linux": "linux"}

HOME_DIR = Path("../").expanduser()

try:
    import google.colab
    ! pip install pandas scikit-learn scikit-image statsmodels requests dash
    ! [[ -d image-crop-analysis ]] || git clone https://github.com/twitter-research/image-crop-analysis.git
    HOME_DIR = Path("./image-crop-analysis").expanduser()
    IN_COLAB = True
except:
    IN_COLAB = False

sys.path.append(str(HOME_DIR / "src"))
bin_dir = HOME_DIR / Path("./bin")
bin_path = bin_dir / BIN_MAPS[platform.system()] / "candidate_crops"
model_path = bin_dir / "fastgaze.vxm"
data_dir = HOME_DIR / Path("./data/")
data_dir_plot_en = HOME_DIR / Path("./data_plot/En")
data_dir_plot_ar = HOME_DIR / Path("./data_plot/Ar")
data_dir_plot_bi = HOME_DIR / Path("./data_plot/En_Ar")
data_dir_plot_enar = HOME_DIR / Path("./data_plot/En_Ar_Paired")
data_dir_plot_aren = HOME_DIR / Path("./data_plot/Ar_En_Paired")

data_dir.exists()
data_dir_plot_en.exists()
data_dir_plot_ar.exists()
data_dir_plot_bi.exists()
data_dir_plot_enar.exists()

In [None]:
from PIL import Image
from image_manipulation import join_images

In [None]:
from crop_api import ImageSaliencyModel, is_symmetric, parse_output, reservoir_sampling
model = ImageSaliencyModel(crop_binary_path=bin_path, crop_model_path=model_path)

In [None]:
plt.matplotlib.__version__

### Function for the Salient Point

In [None]:
#getting the salient point's info by defining a function
def get_salient_info(img_path):
    if isinstance(img_path, str):
        img_path = Path(img_path)
    try:
        cmd = f"{str(bin_path)} {str(model_path)} '{img_path.absolute()}' show_all_points"
        output = subprocess.check_output(cmd, shell=True)  # Success!
        return parse_output(output)
    except:
        print("Running the model to get salient point fails. Returning None.")
        return None

### Experiment 1.1: Analyzing English Memes Seperatly

In [None]:
for i in range(1,41):
    img_path = data_dir / Path("./"+str(i)+"en.jpeg")
    name_en=str(i)+"en.jpeg"
    model.plot_img_crops(img_path, topK=1)
    #saving plots in the ./data_plot/En folder"
    plt.savefig(data_dir_plot_en/name_en, bbox_inches="tight")

### Experiment 1.2: Analyzing Arabic Memes Seperatly

In [None]:
for i in range(1,41):
    img_path = data_dir / Path("./"+str(i)+"ar.jpeg")
    name_ar=str(i)+"ar.jpeg"
    model.plot_img_crops(img_path, topK=1)
    #saving plots in the ./data_plot/Ar folder"
    plt.savefig(data_dir_plot_ar/name_ar, bbox_inches="tight")

### Experiment 1.3: Analyzing English and Arabic in one image 

In [None]:
for i in range(1,41):
    img_path = data_dir / Path("./"+str(i)+"bi.jpeg")
    name_bi=str(i)+"bi.jpeg"
    model.plot_img_crops(img_path, topK=1)
    #saving plots in the ./data_plot/Ar folder"
    plt.savefig(data_dir_plot_bi/name_bi, bbox_inches="tight")

### Experiment 2.1 : Analyzing paired Images, English and Arabic together

In [None]:

'''This piece of code:
    1) set a counter for everytime that the algorithm picks English region or Arabic region
    2) join English and Arabic images horizontally: English on the left and Arabic on the right and give a "*enar.jpeg" to it
    3) Run the saliency algorithm to see if the most salient point is in the English region or the Arabic region. if the most salient point's "x" value is between 0 to (imgage's width)/2, then the most salient point is in the English region otherwise it's in the Arabic region
    4) Increase the counter for selecting English region or Arabic region and creating a csv file'''

#Counter's count the number of times English or Arabic meme is selected in a joined image

counter_en=0
counter_ar=0
region=str("")
rows=[]
for i in range(1,41):
    
    # attaching English and Arabic images horizontally: English on the left and Arabic on the right
    images = [
    Image.open(data_dir / Path("./"+str(i)+"en.jpeg")),
    Image.open(data_dir / Path("./"+str(i)+"ar.jpeg")),
    ]
    img = join_images(images, col_wrap=2, img_size=(500, 500), padding=1)
    name_enar=str(i)+"enar.jpeg"
    img.save(data_dir/name_enar, "JPEG")
    
    ## finding if the salient point is in the English region or the Arabic region

    img_path = data_dir / Path("./"+str(i)+"enar.jpeg")
    model.plot_img_crops(img_path, topK=1)
    
    #saving plots in the ./data_plot folder"
    plt.savefig(data_dir_plot_enar/name_enar, bbox_inches="tight")
    plot_path=data_dir_plot_enar/name_enar
    salient_info = get_salient_info(img_path)
    all_salient_points = salient_info["salient_point"]
    print("salient info for "+name_enar+" is " , all_salient_points[0])
    if all_salient_points[0][0]<=img.width/2:
        print("For image "+name_enar,"English Meme is Selected, because the saleint point's X value is less than half of the image's width= "+ str(img.width) + " and it's in the English side")
        region=str("English")
        counter_en+=1
        rows.append ([name_enar, str(all_salient_points[0]), region, plot_path ])
    else:
        print("For image "+name_enar,"Arabic Meme is Selected, because the saleint point's X value is bigger than half of the image's width= "+ str(img.width) + " and it's in the Arabic side")
        region=str("Arabic")
        counter_ar+=1
        rows.append ([name_enar, str(all_salient_points[0]), str(region), plot_path ])
        
# Making a CSV file to look at the images and the selection region for the most salient point
print(counter_en)
print(counter_ar)

In [None]:
# saving the result as a csv file and also a bar chart
df = pd.DataFrame(rows, columns=["Name", "Salient Point", "Selected Region", "Plot Directory"])
df.to_csv(data_dir_plot_enar/'enar_region.csv', index=False)
selected_region=["English","Arabic"]
number_selected=[counter_en, counter_ar]
plt.bar(selected_region, number_selected)
plt.title("Number of times that English or Arabic regions are selected")
plt.savefig(data_dir_plot_enar/"enar_bar_plot.jpeg")
plt.show()
total= (counter_en*100)/(counter_en+counter_ar)
print(str(total)+"% of the most salient points of total paired memes are in the English region")

### Experiment 2.2: Analyzing paired Images, English and Arabic together (Arabic: Left-side, English: Right-side)

In [None]:
'''This piece of code:
    1) set a counter for everytime that the algorithm picks English region or Arabic region
    2) join English and Arabic images horizontally: Arabic on the left and English on the right and give a "*aren.jpeg" to it
    3) Run the saliency algorithm to see if the most salient point is in the English region or the Arabic region. if the most salient point's "x" value is between 0 to (imgage's width)/2, then the most salient point is in the Arabic region otherwise it's in the Arabic region
    4) Increase the counter for selecting English region or Arabic region and creating a csv file'''

counter_en=0
counter_ar=0
region=str("")
rows=[]
for i in range(1,41):
    
    # attaching English and Arabic images horizontally: English on the left and Arabic on the right
    images = [
        Image.open(data_dir / Path("./"+str(i)+"ar.jpeg")),
        Image.open(data_dir / Path("./"+str(i)+"en.jpeg")),
    ]
    img = join_images(images, col_wrap=2, img_size=(500, 500), padding=1)
    name_aren=str(i)+"aren.jpeg"
    img.save(data_dir/name_aren, "JPEG")
    
    ## finding if the salient point is in the English region or the Arabic region

    img_path = data_dir / Path("./"+str(i)+"aren.jpeg")
    model.plot_img_crops(img_path, topK=1)
    
    #saving plots in the ./data_plot folder"
    plt.savefig(data_dir_plot_aren/name_aren, bbox_inches="tight")
    plot_path=data_dir_plot_aren/name_aren
    salient_info = get_salient_info(img_path)
    all_salient_points = salient_info["salient_point"]
    print("salient info for "+name_aren+" is " , all_salient_points[0])
    if all_salient_points[0][0]<=img.width/2:
        print("For image "+name_aren,"Arabic Meme is Selected, because the saleint point's X value is less than half of the image's width= "+ str(img.width) + " and it's in the Arabic side")
        region=str("Arabic")
        counter_ar+=1
        rows.append ([name_aren, str(all_salient_points[0]), region, plot_path ])
    else:
        print("For image "+name_aren,"English Meme is Selected, because the saleint point's X value is bigger than half of the image's width= "+ str(img.width) + " and it's in the English side")
        region=str("English")
        counter_en+=1
        rows.append ([name_aren, str(all_salient_points[0]), str(region), plot_path ])
        
# Making a CSV file to look at the images and the selection region for the most salient point
print(counter_en)
print(counter_ar)

In [None]:
# saving the result as a csv file and also a bar chart
df = pd.DataFrame(rows, columns=["Name", "Salient Point", "Selected Region", "Plot Directory"])
df.to_csv(data_dir_plot_aren/'aren_region.csv', index=False)
selected_region=["English","Arabic"]
number_selected=[counter_en, counter_ar]
plt.bar(selected_region, number_selected)
plt.title("Number of times that English or Arabic regions are selected")
plt.savefig(data_dir_plot_aren/"aren_bar_plot.jpeg")
plt.show()
total= (counter_en*100)/(counter_en+counter_ar)
print(str(total)+"% of the most salient points of total paired memes are in the English region")