In [1]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster
import numpy as np
import pycountry_convert as pc

In [2]:
# Continent color mapping
CONTINENT_COLORS = {
    'North America': '#1E90FF',  # Dodger Blue
    'South America': '#2E8B57',  # Sea Green
    'Europe': '#9400D3',  # Violet
    'Asia': '#FF4500',  # Orange Red
    'Africa': '#DAA520',  # Goldenrod
    'Oceania': '#00CED1',  # Dark Turquoise
    'Antarctica': '#D3D3D3'  # Light Gray
}

# Function to convert country to continent
def country_to_continent(country_name):
    try:
        # Try to get the country's alpha-2 code
        country_alpha2 = pc.country_name_to_country_alpha2(country_name)
        
        # Convert country code to continent code
        country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
        
        # Map continent code to continent name
        continent_map = {
            'NA': 'North America',
            'SA': 'South America', 
            'EU': 'Europe',
            'AF': 'Africa',
            'AS': 'Asia',
            'OC': 'Oceania',
            'AN': 'Antarctica'
        }
        
        return continent_map.get(country_continent_code, 'Unknown')
    
    except (KeyError, TypeError):
        # Handle cases where country is not found
        # You might want to manually map some countries or print out unrecognized countries
        print(f"Could not find continent for country: {country_name}")
        return 'Unknown'

# Function to load and preprocess data
def load_data():
    # Load original dataframes
    city_df = pd.read_csv('../For_vizml/city.csv')
    collab_df = pd.read_csv('../For_vizml/cocity.csv')
    
    # Add continent information
    city_df['continent'] = city_df['country'].apply(country_to_continent)
    
    return city_df, collab_df

# Function to calculate points for a curved arc
def generate_arc_points(lat1, lon1, lat2, lon2, num_points=100):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # Compute intermediate points
    d = np.sqrt((lat2 - lat1) ** 2 + (lon2 - lon1) ** 2)  # Approximation of great-circle distance
    arc_points = []
    for t in np.linspace(0, 1, num_points):
        lat = lat1 + (lat2 - lat1) * t + np.sin(np.pi * t) * 0.1 * d  # Add curve effect
        lon = lon1 + (lon2 - lon1) * t
        arc_points.append([np.degrees(lat), np.degrees(lon)])

    return arc_points

# Function to get continent color
def get_continent_color(continent):
    return CONTINENT_COLORS.get(continent, '#808080')  # Default to gray if continent not found

# Function to create a Folium map with improved readability
def create_map(city_df, collab_df, collaboration_threshold=10):
    # Initialize the Folium map with a more neutral base map
    m = folium.Map(
        location=[0, 0], 
        zoom_start=2, 
        tiles='CartoDB positron'  # Cleaner, less cluttered base map
    )

    # Create a dictionary of city locations for quick access
    city_locations = city_df.set_index('city_id')[['lat', 'lon', 'continent']].to_dict('index')

    # Add city markers with continent-based coloring
    marker_cluster = MarkerCluster().add_to(m)
    for _, row in city_df.iterrows():
        # Create a color based on continent
        continent_color = get_continent_color(row['continent'])
        
        folium.CircleMarker(
            location=[row['lat'], row['lon']],
            radius=5,  # Smaller marker
            popup=f"{row['city']}, {row['country']}<br>Continent: {row['continent']}<br>Citations: {int(row['citation_sum'])}<br>Publications: {int(row['p_count'])}",
            color=continent_color,
            fill=True,
            fillColor=continent_color,
            fillOpacity=0.7
        ).add_to(marker_cluster)

    # Add filtered collaboration arcs
    for _, row in collab_df.iterrows():
        if int(row['colab_count']) < collaboration_threshold:
            continue  # Skip low-collaboration pairs
        
        city1 = city_locations.get(row['city_id1'])
        city2 = city_locations.get(row['city_id2'])
        
        if city1 and city2:
            # Determine line color based on connecting continents
            continent1 = city1['continent']
            continent2 = city2['continent']
            
            # Blend colors if continents are different
            if continent1 != continent2:
                color1 = get_continent_color(continent1)
                color2 = get_continent_color(continent2)
                #line_color = f'{color1},{color2}'
                line_color = get_continent_color(continent1)
            else:
                line_color = get_continent_color(continent1)

            # Generate arc points
            arc_points = generate_arc_points(
                city1['lat'], city1['lon'], city2['lat'], city2['lon'], num_points=50
            )
            
            # Create polyline with very low opacity and thin weight
            folium.PolyLine(
                locations=arc_points,
                color=line_color,
                weight=max(0.1, int(row['colab_count']) // 100),  # Very thin lines
                opacity=0.02,  # Extremely low opacity
                popup=f"Collaborations: {int(row['colab_count'])}<br>From {continent1} to {continent2}",
            ).add_to(m)

    return m

# Example usage
city_df, collab_df = load_data()
m = create_map(city_df, collab_df[:50000], collaboration_threshold=100)
m.save('academic_collaboration_map.html')

# Print out any unrecognized countries for manual mapping if needed
unrecognized = city_df[city_df['continent'] == 'Unknown']['country'].unique()
print("Unrecognized Countries:", unrecognized)

Could not find continent for country: Democratic Republic Congo
Could not find continent for country: Cote d'Ivoire
Could not find continent for country: Democratic Republic Congo
Unrecognized Countries: ['Democratic Republic Congo' "Cote d'Ivoire"]


Create HTML

In [None]:
# Load data
city_df, collab_df = load_data()

# Create the map
folium_map = create_map(city_df, collab_df[:50000], collaboration_threshold=10)

# Save the map as an HTML file
folium_map.save('city_collaboration_map.html')
print("Map has been saved as 'city_collaboration_map.html'.")



Could not find continent for country: Democratic Republic Congo
Could not find continent for country: Cote d'Ivoire
Could not find continent for country: Democratic Republic Congo
Map has been saved as 'city_collaboration_map.html'.
