In [None]:
!pip install langchain
!pip install langchain-core
!pip install langchain-community
!pip install pillow

: 

In [4]:
import base64
import json
from io import BytesIO
import pandas as pd
from PIL import Image
from IPython.display import HTML, display
from langchain_community.llms import Ollama
import os
import re
import io

In [5]:
# filtering Vienna, Sofia, Limassol (not present) 

city_name = "Vienna" # can be 'Vienna' or 'Sofia'

In [48]:
class SafetyCassifier:
    def __init__(self, city_name, model_name="llava", temperature=0.1):
        self.llm = Ollama(model=model_name, temperature=temperature)
        train = pd.read_csv('manual_labels/train/glare.csv')
        test = pd.read_csv('manual_labels/test/glare.csv')
        self.df = pd.concat([train,test])
        self.df = self.df[['city', 'city_id','img_path']]
        self.city_name = city_name
        self.base_directory = '.'
        self.city_images = self.get_images_by_city()
        
    def convert_to_base64(self, pil_image):
        """
        Convert PIL images to Base64 encoded strings
        :param pil_image: PIL image
        :return: Base64 string
        """
        buffered = BytesIO()
        pil_image.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
        return img_str

    def get_images_by_city(self):
        """
        Retrieve images by city name
        :param directory: Directory containing images
        :param city_name: Name of the city
        :return: List of file paths for the specified city
        """
        city_images = list(self.df[self.df['city']==self.city_name]['img_path'])
        return city_images

    def process_images(self):
        """
        Process images using the model and save responses as JSON files
        :param city_images: List of image file paths
        :param city_name: Name of the city
        """
        prompt = '''
        Your task is to classify the area in the image as "Safe" or "Unsafe" based on visible indicators of security, safety, and liveability. 

        Consider factors such as:

        1. Condition of infrastructure
        2. Presence of security features
        3. Signs of neglect or danger
        4. General sense of safeness

        Please provide a classification based only on these visible indicators. Also, provide 3 keywords that justify your choice.

        Please organize your answer in a JSON object containing the following keys:
        - "Classification": (your answer)
        - "Keywords": (the 3 keywords)
        '''

        # Create directories for storing JSON results
        results_dir = os.path.join(self.base_directory, "results", "cities_json", self.city_name)
        os.makedirs(results_dir, exist_ok=True)

        for file_path in self.city_images:
            pil_image = Image.open(file_path)
            img_base64 = self.convert_to_base64(pil_image)

            # Bind the image to the model
            llm_with_image_context = self.llm.bind(images=[img_base64])

            # Invoke the model with the prompt
            response = llm_with_image_context.invoke(prompt)

            # Remove newline characters from the response
            response = response.replace('\n', '')

            # Prepare the output dictionary
            output_data = {
                "input": prompt.strip(),
                "output": response
            }

            # Create the output JSON filename
            json_filename = os.path.splitext(os.path.basename(file_path))[0] + "_llava.json"
            json_filepath = os.path.join(results_dir, json_filename)

            # Save the output as a JSON file
            with open(json_filepath, 'w') as json_file:
                json.dump(output_data, json_file, indent=4)

    def clean_json_string(self, json_string):
        """
        Extract the JSON object from a string
        :param json_string: The input string
        :return: Cleaned JSON string
        """
        match = re.search(r'```json(.*?)```', json_string, re.DOTALL)
        if match:
            cleaned_string = match.group(1).strip()
            return cleaned_string
        return ""

    def parse_json_files(self, results_dir):
        """
        Extracts the classification and keywords, and associates them with the coordinates and IDs extracted from the filenames
        :param results_dir: Directory containing JSON files
        :return: List of dictionaries with latitude, longitude, ID, classification, and keywords
        """
        data = []
        for root, dirs, files in os.walk(results_dir):
            for file in files:
                if file.endswith('_llava.json'):
                    json_path = os.path.join(root, file)
                    with open(json_path, 'r') as json_file:
                        json_data = json.load(json_file)
                        # Extract latitude, longitude, and ID from the filename
                        #coords = file.split('_')[0:2]
                        #lat, lon = float(coords[0]), float(coords[1])
                        _id = file.split('_')[0]
                        output = json_data.get('output', '')
                        try:
                            cleaned_output = self.clean_json_string(output)
                            output_json = json.loads(cleaned_output)
                            classification = output_json.get('Classification', '')
                            keywords = ', '.join(output_json.get('Keywords', []))
                            data.append({
                                #'Latitude': lat,
                                #'Longitude': lon,
                                'ID': _id,
                                'Classification': classification,
                                'Keywords': keywords
                            })
                        except json.JSONDecodeError:
                            data.append({
                                #'Latitude': lat,
                                #'Longitude': lon,
                                'ID': _id,
                                'Classification': '',
                                'Keywords': ''
                            })
                        except KeyError:
                            data.append({
                                #'Latitude': lat,
                                #'Longitude': lon,
                                'ID': _id,
                                'Classification': '',
                                'Keywords': ''
                            })
        return data

    def save_to_city_csv(self, json_data, city_name):
        """
        Save the extracted data to a CSV file named after the city
        :param json_data: List of dictionaries with extracted data
        :param city_name: Name of the city
        """
        df = pd.DataFrame(json_data)

        # Create directories for storing CSV results
        csv_dir = os.path.join(self.base_directory, 'results', 'cities_csv')
        os.makedirs(csv_dir, exist_ok=True)

        csv_path = os.path.join(csv_dir, f'{city_name}_classification.csv')
        df.to_csv(csv_path, index=False)
        print(f"Data saved to {csv_path}")

In [53]:
# Usage


safety_cassifier = SafetyCassifier(city_name)
safety_cassifier.process_images()

# Parse JSON files and save to CSV
json_data = safety_cassifier.parse_json_files(os.path.join(f'results/cities_json/{city_name}'))
safety_cassifier.save_to_city_csv(json_data, city_name)

In [54]:
json_data = safety_cassifier.parse_json_files(os.path.join(f'results/cities_json/{city_name}'))

In [55]:
safety_cassifier.save_to_city_csv(json_data, city_name)

Data saved to ./results/cities_csv/Vienna_classification.csv


In [60]:
results = pd.read_csv('results/cities_csv/Vienna_classification.csv')

In [75]:
import h3


In [86]:
test = pd.read_csv('data/h3.csv')

In [87]:
test.rename(columns={'uuid':'ID'}, inplace=True)

In [88]:
test = test[['ID','h3_15']]

In [77]:
h3.cell_to_latlng(test[test['ID']=='66039dae-d376-4b17-81b7-9dcc284a092c']['h3_15'].values[0])

(-6.891507813535662, 109.66739979914114)

In [89]:
test

Unnamed: 0,ID,h3_15
0,d3cca4a7-7994-47ff-9749-7df8ca7228e3,8f4a261b3a698f5
1,33f474ab-496e-41a7-86a6-08fad8fe6638,8f4a261b360c765
2,0da8bf8d-bf34-4d94-aa51-2a7a3c880f6b,8f4a26195b32b31
3,23c0aa2e-573b-4a3f-a65b-fb8f7349725b,8f4a26195a1db76
4,4f47ba8e-8437-4213-9721-8336b02ea703,8f4a261b338ca70
...,...,...
10004546,397c515d-83a4-41cf-b23e-7b1c7ef53cd2,8f2f5aad63688d8
10004547,24362e3b-b394-4529-9176-7f6f92e21ce6,8f2f5aad6369ca0
10004548,235d4130-3401-4d23-8fb4-6feaa4b4df2a,8f2f5aad636900e
10004549,56a15d54-7b55-4db0-8988-0869437a3400,8f2f5aad6369271


In [91]:
test['lat_lng'] = test.apply(lambda x: h3.cell_to_latlng(x['h3_15']), axis=1)

In [93]:
test = test[['ID','lat_lng']]

In [95]:
test.to_csv('geolocations.csv')

In [96]:
df

Unnamed: 0,city,city_id,img_path
132,Vienna,1040261752,img/5/ddf8a2e9-812f-46c1-b7d5-144d8a16184b.jpeg
142,Vienna,1040261752,img/4/06961596-6305-4c9b-8c96-d51fd4750711.jpeg
166,Vienna,1040261752,img/4/22a614f6-389a-4cd9-8b62-5b8014421304.jpeg
193,Vienna,1040261752,img/6/adf6c053-41ef-4743-bd49-5ad22d9715ad.jpeg
198,Vienna,1040261752,img/1/10bb79dc-9500-4472-af81-923b1dbd8e51.jpeg
...,...,...,...
1003,Sofia,1100762037,img/4/11c7327f-6d18-406c-aa1f-26a13e5583da.jpeg
1015,Sofia,1100762037,img/1/db01158e-e2aa-4936-8d7c-92f34a9e709a.jpeg
1036,Sofia,1100762037,img/6/7e3debd0-8740-4d44-8935-36ef6b215694.jpeg
1038,Sofia,1100762037,img/4/9de5fa9b-ac24-452d-be82-eac72c1c14bc.jpeg


In [98]:
results

Unnamed: 0,ID,Classification,Keywords
0,80921b61-2d57-4027-aa3d-c45902b88f02,Safe,"Well-maintained infrastructure, Security featu..."
1,9b14de57-1779-425e-83d2-4c999c3e4775,Safe,"Well-maintained infrastructure, Security featu..."
2,55be511e-faa7-4016-a974-3ef631d2ab5a,Safe,"Well-maintained infrastructure, Security featu..."
3,6662bc21-7515-465c-a268-8ec83c63ded7,Safe,"Well-maintained infrastructure, No visible sig..."
4,48d7686a-0410-48e4-a6ac-496785e15e23,Safe,"Well-maintained infrastructure, Presence of se..."
...,...,...,...
324,e2c167d2-fe24-4b11-9dfb-4c51f5b39769,Safe,"Well-maintained infrastructure, Orderly parkin..."
325,c273582b-82fa-433b-a9bb-d81a7e3db51d,Safe,"Well-maintained infrastructure, Security featu..."
326,91e05b32-f041-49fe-98cf-75b54fc1f2a3,Safe,"Well-maintained infrastructure, No visible sig..."
327,06961596-6305-4c9b-8c96-d51fd4750711,Safe,"Well-maintained infrastructure, Presence of se..."
