# Setup

In [43]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
import pandas as pd
import time
import json

In [45]:
!pip install mapbox
from mapbox import Geocoder



In [46]:
!pip install shapely pyproj



In [47]:
!pip install tqdm



In [48]:
grouped_final = pd.read_csv('/content/drive/MyDrive/DSS/WWF x DSS/final_grouped_df.csv') # change file path accordingly

In [49]:
grouped_final = grouped_final[['PDF Name', 'name', 'Accomplishment', 'geojson_column']]

# Mapping

In [50]:
from IPython.display import HTML, display, clear_output
import ipywidgets as widgets
import folium

pdf_data_df = pd.read_csv('/content/drive/MyDrive/DSS/WWF x DSS/PDFs useful_not - Sheet1.csv') # change file path accordingly
# Merge and specify custom suffixes for overlapping columns
grouped_final = grouped_final.merge(pdf_data_df, left_on='PDF Name', right_on='PDF Name', how='left', suffixes=('', '_pdf'))

# Identifying and removing duplicate columns
grouped_final = grouped_final.loc[:,~grouped_final.columns.duplicated()]
grouped_final = grouped_final[['PDF Name', 'name', 'Accomplishment', 'geojson_column', 'PDF Link']]

# Custom CSS
custom_css = HTML("""
<style>
    .custom-search-box {
        border: 2px solid #007BFF;
        border-radius: 4px;
        padding: 10px;
        font-size: 14px;
        color: #333;
    }
    .custom-search-button {
        background-color: #007BFF;
        color: white;
        padding: 10px 15px;
        margin-left: 5px;
        border: none;
        border-radius: 4px;
        cursor: pointer;
    }
</style>
""")

# Display the custom CSS
display(custom_css)

# Search box widget
search_box = widgets.Text(
    value='',
    placeholder='Type here to search...',
    description='',
    disabled=False,
    layout=widgets.Layout(width='300px')
)

search_box.add_class('custom-search-box')

# Search button
search_button = widgets.Button(
    description='Search',
    disabled=False,
    button_style='info',  # Use predefined styles
    tooltip='Click to search',
    icon='search'
)

search_button.add_class('custom-search-button')

# Function to handle search submission
def on_search(sender):
    try:
        clear_output(wait=True)
        display(widgets.HBox([search_box, search_button]))  # Display the search box and button together
        search_term = search_box.value  # Get the value from the search box
        filtered_data = filter_df(search_term, grouped_df)  # Filter the DataFrame

        map_object = plot_polygons(filtered_data, mapbox_access_token)  # Plot the polygons
        display(map_object)  # Display the map

    except Exception as e:
        print(f"An error occurred: {e}")

# Connect the event handler to the search box and button
search_box.on_submit(on_search)
search_button.on_click(on_search)


In [52]:
import pandas as pd
import json
import folium
from shapely.geometry import shape, mapping
import textwrap
import time
import ast

from tqdm import tqdm

mapbox_access_token = "pk.eyJ1IjoibXJraWViYXJyIiwiYSI6ImNscHFwZGphZjAwZm8ycG94OWdnbzJmeTUifQ.zXqvLa1KEPzvvdJUd65f5g"
grouped_df = grouped_final.copy()
grouped_df = grouped_df.rename(columns={'name': 'Locations'})

# Function to remove duplicates from a list
def remove_duplicates(lst):
    return list(set(lst))

# Assuming 'grouped_df' is your DataFrame and it has a 'Locations' column with string representations of lists
def evaluate_locations(locations_str):
    # Skip empty strings
    if not locations_str.strip():
        return []

    try:
        # Attempt to evaluate the string as a list
        locations = ast.literal_eval(locations_str)
        if isinstance(locations, list):
            # If evaluation is successful and is a list, return it
            return locations
    except Exception as e:
        # If there is an error in evaluation, print it and return an empty list
        print(f"Error evaluating locations: {e}, input was: {locations_str}")
        return []

# Apply the function to the 'Locations' column
grouped_df['Locations'] = grouped_df['Locations'].apply(evaluate_locations)

# Now, assuming the 'Locations' column has lists, we can remove duplicates
grouped_df['Locations'] = grouped_df['Locations'].apply(lambda x: list(set(x)))


def simplify_geojson(geojson, tolerance=0.01):
    """
    Simplify the geometry of a GeoJSON object.
    :param geojson: GeoJSON object or string
    :param tolerance: Tolerance level for simplification
    :return: Simplified GeoJSON object
    """
    geom = shape(geojson)
    simplified_geom = geom.simplify(tolerance, preserve_topology=True)
    return mapping(simplified_geom)

def simplify_featurecollection(featurecollection, tolerance=0.01):
    """
    Simplify the geometries in a GeoJSON FeatureCollection.
    :param featurecollection: GeoJSON FeatureCollection object
    :param tolerance: Tolerance level for simplification
    :return: Simplified GeoJSON FeatureCollection
    """
    simplified_features = []

    for feature in featurecollection['features']:
        geom = shape(feature['geometry'])
        simplified_geom = geom.simplify(tolerance, preserve_topology=True)
        simplified_feature = {
            'type': 'Feature',
            'properties': feature['properties'],
            'geometry': mapping(simplified_geom)
        }
        simplified_features.append(simplified_feature)

    return {'type': 'FeatureCollection', 'features': simplified_features}

import random

def get_random_color():
    """Generate a random hex color."""
    return f'#{random.randint(0, 0xFFFFFF):06x}'

def plot_polygons(filtered_df, mapbox_access_token, tolerance=0.01):
    print("Starting map plotting.")
    start_time = time.time()

    # Define the Mapbox URL for tiles
    mapbox_url = f"https://api.mapbox.com/styles/v1/mrkiebarr/clprndyex00dm01r85to045vb/tiles/256/{{z}}/{{x}}/{{y}}?access_token={mapbox_access_token}"

    # Initialize a Folium map
    m = folium.Map(location=[20, 0], zoom_start=2, tiles=mapbox_url, attr='Mapbox')
    for index, row in tqdm(filtered_df.iterrows(), total=len(filtered_df), desc="Progress Bar"):
        # Convert 'Accomplishment' to string if it's not NaN, otherwise use a placeholder
        if pd.notna(row['Accomplishment']) and isinstance(row['Accomplishment'], str):
            accomplishment_text = row['Accomplishment']
        else:
            accomplishment_text = "No Accomplishment provided."

        # Use textwrap to handle the wrapping of accomplishment text
        wrapper = textwrap.TextWrapper(width=75, break_long_words=True, replace_whitespace=False)
        wrapped_accomplishment = wrapper.fill(accomplishment_text)

        # Extracting popup info
        location = row['Locations']
        pdf_name = row['PDF Name']
        pdf_link = row["PDF Link"]

        # Format popup text with HTML for hyperlink
        popup_text = f"""
        Accomplishments:<br>{wrapped_accomplishment}<br>
        Locations: {location}<br>
        PDF: <a href='{pdf_link}' target='_blank'>{pdf_name}</a>
        """

        color = get_random_color()  # Assign a random color
        geojson_data = json.loads(row['geojson_column'])
        if geojson_data['type'] == 'FeatureCollection':
            simplified_geojson = simplify_featurecollection(geojson_data, tolerance)
        else:
            simplified_geojson = simplify_geojson(geojson_data, tolerance)

        folium.GeoJson(
            simplified_geojson,
            style_function=lambda x, color=color: {'fillColor': color, 'color': color},
            popup=folium.Popup(popup_text, max_width=300)
        ).add_to(m)

    end_time = time.time()
    print(f"Map plotting took {end_time - start_time} seconds.")
    return m


# Widgets setup
search_box = widgets.Text(placeholder='Type here', description='Search:')
search_button = widgets.Button(description='Search')

import ipywidgets as widgets
from IPython.display import display, clear_output

import ast

# Apply the function to clean up the "Locations" column
def filter_df(search_term, df):
    # Filter based on the search term
    filtered_df = df[df['Accomplishment'].str.contains(search_term, case=False, na=False) | df['Locations'].str.contains(search_term, case=False, na=False)]

    return filtered_df

import ipywidgets as widgets
from IPython.display import HTML, display

def display_interactive_table(df):
    num_matches = len(df)
    # Styling for the table and the phrase
    style = """
    <style>
        table {
            width: 100%;
            border-collapse: collapse;
        }
        th, td {
            border: 1px solid #ddd;
            padding: 8px;
            text-align: left;
        }
        th {
            background-color: #f2f2f2;
        }
        tr:hover {
            background-color: #f5f5f5;
        }
        .matches-phrase {
            margin-bottom: 10px;
        }
    </style>
    """

    # Create the phrase with the number of matches
    matches_phrase = f"<div class='matches-phrase'>There were {num_matches} matches:</div>"

    # Create HTML table header
    table_html = "<table>"
    table_html += "<tr><th>PDF Name</th><th>Locations</th><th>Accomplishment</th><th>PDF Link</th></tr>"

    # Loop through the DataFrame and create table rows
    for index, row in df.iterrows():
        table_html += f"<tr><td>{row['PDF Name']}</td>"
        table_html += f"<td>{', '.join(row['Locations'])}</td><td>{row['Accomplishment']}</td>"
        table_html += f"<td><a href='{row['PDF Link']}' target='_blank'>Open PDF</a></td></tr>"

    table_html += "</table>"

    # Combine style, phrase, and table HTML
    full_html = style + matches_phrase + table_html

    # Display the styled HTML table with the phrase
    display(HTML(full_html))

def highlightPolygon(pdf_name):
    # Filter the DataFrame for the selected PDF
    highlighted_df = grouped_df[grouped_df['PDF Name'] == pdf_name]

    # Clear existing output and redraw the map with highlighted polygons
    clear_output(wait=True)
    display(search_box, search_button)
    map_with_highlighted_polygons = plot_polygons(highlighted_df, mapbox_access_token)
    display(map_with_highlighted_polygons)
    display_interactive_table(grouped_df)  # Redisplay the table

# Modify plot_polygons function to add a highlight feature (e.g., change polygon color)


# Event handler for the search box
from IPython.display import display, clear_output, HTML, DisplayHandle

# Create a display handle for dynamic content
dynamic_display = DisplayHandle()

# Create a display handle for the interactive table
table_display = DisplayHandle()

# Event handler for the search box
def on_search_submit(button):
    search_term = search_box.value.strip()
    if search_term:
        filtered_data = filter_df(search_term, grouped_df)
        print("Found " + str(len(filtered_data)) + " matches to " + '"' + search_term + '"')

        # Generate updated map and table
        updated_map = plot_polygons(filtered_data, mapbox_access_token)

        # Clear the previous outputs and update the display handles
        clear_output(wait=True)
        map_display.update(updated_map)
        display(search_box, search_button)  # Redisplay the search box and button

        # Update or display the interactive table
        display_interactive_table(filtered_data)

    else:
        print("Please enter a search term.")

# Set up and display search widgets
search_box = widgets.Text(placeholder='Type here', description='Search:')
search_button = widgets.Button(description='Search')
search_button.on_click(on_search_submit)

print("Displaying Initial Map")
initial_map = plot_polygons(grouped_df[:10], mapbox_access_token)
# Display the initial map and the search widgets
display(initial_map)
display(search_box, search_button)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
initial_map

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

import ast
word_df = pd.read_csv('/content/drive/MyDrive/DSS/WWF x DSS/mapping_word_to_pdfs2.csv') # change file path accordingly
def filter_df(search_term, df):
    return df[df['Accomplishment'].str.contains(search_term, case=False, na=False) | df['name'].str.contains(search_term, case=False, na=False)]

# Event handler for the search box
def on_search_submit(button):
    # Retrieve the search term directly from the search_box widget
    search_term = search_box.value.strip()  # Strip any leading/trailing whitespace
    print(f"Search term: '{search_term}'")  # Print the search term

    # Proceed if there's a search term entered
    if search_term:
        # Filter the DataFrame
        filtered_data = filter_df(search_term, grouped_df)
        print(f"Number of PDFs after filtering: {len(filtered_data)}")  # Print the number of matches

        # Generate the map
        map_object = plot_polygons(filtered_data, mapbox_access_token)

        # Now that we have our final output ready, clear previous outputs and display new ones
        clear_output(wait=True)
        display(search_box, search_button)
        display(map_object)
    else:
        print("Please enter a search term.")

    # Re-display the search box and button after the print statements
    display(search_box, search_button)

# Ensure the search button is linked to the correct event handler
search_button.on_click(on_search_submit)

# Display the search box and button
display(search_box, search_button)


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

import ast
word_df = pd.read_csv('/content/drive/MyDrive/DSS/WWF x DSS/mapping_word_to_pdfs.csv') # change file path accordingly
def filter_df(search_term, df):
    return df[df['Accomplishment'].str.contains(search_term, case=False, na=False) | df['name'].str.contains(search_term, case=False, na=False)]

# Event handler for the search box
def on_search_submit(button):
    # Retrieve the search term directly from the search_box widget
    search_term = search_box.value.strip()  # Strip any leading/trailing whitespace
    print(f"Search term: '{search_term}'")  # Print the search term

    # Proceed if there's a search term entered
    if search_term:
        # Filter the DataFrame
        filtered_data = filter_df(search_term, grouped_df)
        print(f"Number of rows after filtering: {len(filtered_data)}")  # Print the number of matches

        # Generate the map
        map_object = plot_polygons(filtered_data, mapbox_access_token)

        # Now that we have our final output ready, clear previous outputs and display new ones
        clear_output(wait=True)
        display(search_box, search_button)
        display(map_object)
    else:
        print("Please enter a search term.")

    # Re-display the search box and button after the print statements
    display(search_box, search_button)


# Ensure the search button is linked to the correct event handler
search_button.on_click(on_search_submit)

# Display the search box and button
display(search_box, search_button)


In [None]:
# Assuming grouped_df and mapbox_access_token are already defined

# Generate the map using the plot_polygons function
initial_map = plot_polygons(grouped_df.iloc[:8, :], mapbox_access_token)

# Display the map
initial_map