In [12]:
import os
import pandas as pd
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

In [13]:
API_ENDPOINT = "http://127.0.0.1:5050/predict"
IMAGES_DIR = 'save/images'
OUTPUT_CSV = 'csv_files'
DF_SPLITS_ROW = 500

In [14]:
df = pd.read_csv('../data/01.08 to 10.09 - Merch Images.csv')
df.head()

Unnamed: 0,Date,Time,Longitude,Latitude,Image,CustomerName,UserName,Category,NodeName
0,14/08/2024 12:00:00 AM,11:50:00,79.82594,7.45958,https://surge.lk/Myfiles/RetailerImages/134975...,K.N.D.Super.,Sudarshana Megasooriya,1M Visibility,Mahendra Amarasinghe\t
1,14/08/2024 12:00:00 AM,12:07:00,79.83018,7.48286,https://surge.lk/Myfiles/RetailerImages/134845...,Best Buy,Sudarshana Megasooriya,5M Visibility,Mahendra Amarasinghe\t
2,14/08/2024 12:00:00 AM,12:49:00,80.11998,6.26764,https://surge.lk/Myfiles/RetailerImages/135173...,Sanka Food Mart,Shehan Sankalpa,5M Visibility,Mahendra Amarasinghe\t
3,14/08/2024 12:00:00 AM,12:50:00,80.136,6.2731,https://surge.lk/Myfiles/RetailerImages/135173...,Sanka Food Mart,Shehan Sankalpa,Freezer Planogram,Mahendra Amarasinghe\t
4,14/08/2024 12:00:00 AM,12:56:00,80.11998,6.26764,https://surge.lk/Myfiles/RetailerImages/135173...,Sanka Food Mart,Shehan Sankalpa,1M Visibility,Mahendra Amarasinghe\t


In [15]:
if not os.path.exists(IMAGES_DIR):
    os.makedirs(IMAGES_DIR)

if not os.path.exists(OUTPUT_CSV):
            os.makedirs(OUTPUT_CSV)

In [16]:
def download_image(url, filename):
    try:
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            with open(filename, 'wb') as f:
                f.write(response.content)
        else:
            print(f"Error downloading image from {url}")
            return False
    except Exception as e:
        print(f"Exception occurred while downloading image from {url}: {e}")
        return False
    return True

In [17]:
def get_prediction(image_path):
    try:
        with open(image_path, 'rb') as f:
            files = {'image': f}
            response = requests.post(API_ENDPOINT, files=files)
            if response.status_code == 200:
                return response.json().get('board_types')
            else:
                print(f"Error with API response: {response.status_code}")
                return 'Error'
    except FileNotFoundError:
        print(f"File not found for prediction: {image_path}")
        return 'File not found'
    except Exception as e:
        print(f"Exception occurred while getting prediction: {e}")
        return 'Error'

In [18]:
def process_image(row):
    image_url = row['Image']
    image_name = os.path.basename(image_url)
    image_path = os.path.join(IMAGES_DIR, image_name)

    if download_image(image_url, image_path):
        prediction = get_prediction(image_path)
        os.remove(image_path)
    else:
        prediction = 'link not working'
    
    return prediction

In [19]:
def predictions_df(df):
    predictions = []
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        image_url = row['Image']
        image_name = os.path.basename(image_url)
        image_path = os.path.join(IMAGES_DIR, image_name)

       
        if not download_image(image_url, image_path):
            predictions.append('Link not working')
            continue
        
       
        prediction = get_prediction(image_path)
        predictions.append(prediction)
        
        
        try:
            os.remove(image_path)
        except Exception as e:
            print(f"Exception occurred while removing image file: {e}")

    return predictions

In [20]:
def remove_brackets(value):
    if isinstance(value, list):
        return ', '.join(f'{item[0]} {item[1]:.2f}' for item in value) if value else 'none'
    return value

In [21]:
def process_df(df):
    for i in range(0,len(df),DF_SPLITS_ROW):
        proc_df = df.iloc[i:i+DF_SPLITS_ROW]

        preds = predictions_df(proc_df)

        proc_df['Prediction'] = preds
        proc_df['Prediction'] = proc_df['Prediction'].apply(remove_brackets)


        csv_name = f'{OUTPUT_CSV}/output_{i}.csv'
        proc_df.to_csv(csv_name, index=False)

In [23]:
process_df(df)

  2%|▏         | 8/500 [00:43<45:35,  5.56s/it]

: 