In [None]:
import spacy
import sqlite3
import pandas as pd
from spacy.lang.en.stop_words import STOP_WORDS
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from tqdm import tqdm
from joblib import Parallel, delayed
import os
from google.cloud import vision
import io
import string
from nltk.corpus import stopwords
from difflib import SequenceMatcher
from textblob import TextBlob
from fuzzywuzzy import process, fuzz
import re
from skimage.feature import graycomatrix, graycoprops
import pytesseract
import cv2

In [None]:
#Load Stop Words
stop = stopwords.words('english')
with open('selected_words.txt', 'r') as f:
    stop_words = [word.strip().replace('"', '') for line in f.readlines() for word in line.split(',')]
stop.extend(stop_words)

In [None]:
#Detect and Process Text here
def lower_case(df):
    df['text'] = df['text'].apply(str.lower)
    return df

def remove_punctuations(df):
    cleaned_text = []
    for index in tqdm(range(df.shape[0])):
        text = df['text'].iloc[index]

        word_tokens = text.split()
        
        table = str.maketrans('', '', string.punctuation)
        stripped = [w.translate(table) for w in word_tokens]

        filtered_sentence = " ".join(stripped).strip()
        cleaned_text.append(filtered_sentence)
    df['text'] = np.array(cleaned_text)
    return df

def remove_null(df):
    if df['text'].isnull().sum() > 0:
        df.dropna(inplace = True)
    return df

def detect_text(path):
    """Detects text in the file."""
    
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
 
    text_list = []

    for text in texts:
        text_list.append('\n"{}"'.format(text.description))

    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))
    if text_list is None:
        print("No Text Detected")
    
    return text_list


def removeStop(temp_df,stopwords = stop):
    temp_df['text'] = temp_df['text'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

def cleanDataframeEnglish(df,column_name):
  cleanedDf = pd.DataFrame(columns=['words'])
  for index, row in df.iterrows():
      word = row[column_name]
      if word.isalnum() and not word.isspace() and word.isascii():
        new_row = pd.DataFrame.from_records([{'words':word}])
        # print("New Row",new_row)
        cleanedDf = pd.concat([cleanedDf,new_row],ignore_index=True)
        # print("Iteration",cleanedDf)

  return cleanedDf

def executeText(path):
    image_text = detect_text(path)
    if len(image_text) ==0:
        returnDict = {'Word List': []}
        return returnDict
    temp_df = pd.DataFrame(columns=['text'])
    temp_df['text'] = image_text[0].replace("\n"," ").split()
    temp_df = remove_punctuations(temp_df)
    temp_df = lower_case(temp_df)
    temp_df = remove_null(temp_df)
    temp_df['text'] = temp_df['text'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
    new_df = cleanDataframeEnglish(temp_df, 'text')   
    word_list = new_df.values.flatten().tolist()
    returnDict = {'Word List': word_list}

    return returnDict

In [None]:
word_list = executeText("../images-115-max-keys-400/images/Ace-MR_Tablet/Ace-MR_Tablet1_gaussian_noise.jpg")
word_list


In [None]:

def color_moments(image_path):
    img = cv2.imread(image_path)
    channels = cv2.split(img)

    colour_features = []
    for channel in channels:
        moments = cv2.moments(channel)
        for i in range(3):
            for j in range(3):
                if i + j <= 2:
                    colour_features.append(moments['m{}{}'.format(i, j)] / moments['m00'])

    returnDict = {'Colour Features': colour_features}
    return returnDict


def texture_features(image_path):
        # Convert image to grayscale

        img = cv2.imread(image_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Compute GLCM features
        glcm = graycomatrix(gray, distances=[1], angles=[0], symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')
        dissimilarity = graycoprops(glcm, 'dissimilarity')
        homogeneity = graycoprops(glcm, 'homogeneity')
        energy = graycoprops(glcm, 'energy')
        correlation = graycoprops(glcm, 'correlation')

        returnDict = {'Contrast': contrast.flatten()[0],
                     'Dissimilarity': dissimilarity.flatten()[0], 
                     'Homogeneity': homogeneity.flatten()[0], 
                     'Energy':energy.flatten()[0], 
                     'Coorelation':correlation.flatten()[0]}
        return returnDict

def shape_features(image_path):
    # Convert image to grayscale
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Threshold the image to create a binary image
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Compute the contours of the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Compute the area, perimeter, and aspect ratio of each contour
    areas = []
    perimeters = []
    aspect_ratios = []
    centroid_xs = []
    centroid_ys = []
    for contour in contours:
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        centroid_x = x + w/2
        centroid_y = y + h/2
        areas.append(area)
        perimeters.append(perimeter)
        aspect_ratios.append(aspect_ratio)
        centroid_xs.append(centroid_x)
        centroid_ys.append(centroid_y)

    # Compute the mean and standard deviation of the computed features
    mean_area = np.mean(areas)
    std_area = np.std(areas)
    mean_perimeter = np.mean(perimeters)
    std_perimeter = np.std(perimeters)
    mean_aspect_ratio = np.mean(aspect_ratios)
    std_aspect_ratio = np.std(aspect_ratios)
    mean_centroid_x = np.mean(centroid_xs)
    std_centroid_x = np.std(centroid_xs)
    mean_centroid_y = np.mean(centroid_ys)
    std_centroid_y = np.std(centroid_ys)

    returnDict = {
    'Mean Area': mean_area,
    'Std Area': std_area,
    'Mean Perimeter': mean_perimeter,
    'Std Perimeter': std_perimeter,
    'Mean Aspect Ratio': mean_aspect_ratio,
    'Std Aspect Ratio': std_aspect_ratio,
    'Mean Centroid X': mean_centroid_x,
    'Std Centroid X': std_centroid_x,
    'Mean Centroid Y': mean_centroid_y,
    'Std Centroid Y': std_centroid_y
    }   
    # Return the computed features
    return returnDict

def pattern_features(image_path):
    img = cv2.imread(image_path)
    # Convert image to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Compute the normalized histogram of the image
    hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
    hist_norm = hist / np.sum(hist)

    # Compute the entropy of the histogram
    eps = np.finfo(float).eps
    entropy = -np.sum(hist_norm * np.log2(hist_norm + eps))
    entropylist = entropy.tolist()
    # entropylist = [float(x) if isinstance(x, np.float32) else x for x in entropylist]
    returnDict = {'Entropy': entropylist}

    # Return the computed feature
    return returnDict





In [None]:
texture = texture_features("Accept-SP_Tablet2 (1).jpg")
print(texture)

In [None]:
def executeFeatures(image_path):
    wordList = executeText(image_path)
    colour_moments = color_moments(image_path)
    texture = texture_features(image_path)
    shape = shape_features(image_path)
    pattern = pattern_features(image_path)

    returnDict = {'Text' : wordList, 'Color Moments': colour_moments, 'Texture':texture,'Shape':shape, 'Pattern':pattern}

    return returnDict    

In [None]:
VALID_EXTENSIONS = ('.jpg')

def is_valid_image(filename):
    """Returns True if the file is a valid image file."""
    return os.path.splitext(filename)[1].lower() in VALID_EXTENSIONS

def process_directory(dir_path):
    """Process directory and its contents."""
    image_path=[]
    for filename in os.listdir(dir_path):
        file_path = os.path.join(dir_path, filename)
        if os.path.isfile(file_path) and is_valid_image(filename):
            # process image file here
            image_path.append(file_path)
        
        elif os.path.isdir(file_path):
            image_path.extend(process_directory(file_path))
    return image_path

# all_image_paths = []
# # pass every directory path to process_directory function
# for dirpath, dirnames, filenames in os.walk('../images-115-max-keys-400/images'):
#     dir_name = os.path.basename(dirpath)
#     image_path = process_directory(dirpath)
#     all_image_paths.append(image_path)

# for image_path in all_image_paths:
#     for image_p in image_path:
#         medicine_data = executeFeatures(image_p)

In [None]:
import json
def toJson(image_path, directory,counter):
    dir_name = directory
    output_dir="metaanalysis"
    output_path1 = os.path.join(output_dir, dir_name)
    if not os.path.exists(output_path1):
        os.mkdir(output_path1)
    output_name = f"{dir_name}_{counter}.json"
    output_path = os.path.join(output_path1, output_name)
    print("Outpath:",output_path)
    if os.path.exists(output_path):
        output_name = f"{dir_name}_{counter}.json"
        output_path = os.path.join(output_path1, output_name)
        return 1
    with open(output_path, 'w') as f:
        json.dump(executeFeatures(image_path), f)

In [7]:
import csv

def find_value_by_name(name):
    with open('../iqr_scores.csv', 'r') as file:
        csv_reader = csv.reader(file)
        next(csv_reader)  # Skip header row if present

        for row in csv_reader:
            if row[0] == name:
                return row[1]

    return None  # Return None if name not found

# Usage example
csv_file = '../iqr_scores.csv'  # Replace with your CSV file name/path
name_to_search = 'Acegrow_3D_Tablet'  # Replace with the name you want to search for

result = find_value_by_name(name_to_search)
if result:
    print(f"Value for {name_to_search}: {result}")
else:
    print(f"No matching name found for {name_to_search}.")


Value for Acegrow_3D_Tablet: 49569.69868


In [None]:
for dirpath, dirnames, filenames in os.walk('../images-115-max-keys-400/images'):
    print("Dir:",dirpath)
    dir_name = os.path.basename(dirpath)
    if dir_name == "images":
        continue    
    image_paths = process_directory(dirpath)
    if not image_paths:
        continue
    counter = 0
    for image_path in image_paths:
        counter += 1
        print("Image_path:", image_path)
        print("Dir name:",dir_name)
        toJson(image_path,dir_name, counter)
    

In [None]:
medicine_data=executeFeatures("Accept-SP_Tablet2 (1).jpg")
medicine_data


In [None]:
import json
with open('Accept-SP_Tablet.json','w') as file:
    json.dump(medicine_data, file)