# Chemical Space Explorer - Function Testing Notebook

Notebook to explore the app functionality by testing individual components and visualizing the outputs.

## 1. Setup and Library Imports

In [13]:
# Import all necessary libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import geopandas as gpd
import folium
from folium import plugins
from typing import List, Dict, Optional, Tuple
import os
import json

# Import helper functions from app.py
from app import create_trends_plot, create_contribution_choropleth, create_summary_dataframe
from app import create_article_plot, create_top_trends_plot, create_empty_plot

# Check to see if utils/functions.py exists and import it if it does
try:
    from utils.functions import load_country_data, get_display_data, create_main_plot
    from utils.functions import create_contribution_map_plot, create_summary_table
    from utils.functions import calculate_top_contributors, create_article_plot_simple
    from utils.functions import create_top_collabs_plot
    USE_UTILS = True
    print("Successfully imported functions from utils/functions.py")
except ImportError:
    USE_UTILS = False
    print("Could not import from utils/functions.py - will use functions from app.py instead")

Successfully imported functions from utils/functions.py


## 2. Data Loading and Exploration

Let's first test loading the data and explore its structure.

In [14]:
# Check if data exists
DATA_PATH = "./data/data.parquet"
print(f"Data file exists: {os.path.exists(DATA_PATH)}")

# Try loading the data
try:
    df = pd.read_parquet(DATA_PATH)
    print(f"Successfully loaded data with shape: {df.shape}")
    
    # Display basic info about the data
    print("\nData columns:")
    for col in df.columns:
        print(f"- {col}")
        
    # Display a few rows
    print("\nFirst 5 rows of data:")
    display(df.head())
    
except Exception as e:
    print(f"Error loading data: {e}")
    # Create dummy data for testing if needed
    print("\nCreating dummy data for testing...")
    df = pd.DataFrame({
        'year': list(range(2000, 2023)) * 3,
        'country': ['United States'] * 23 + ['China'] * 23 + ['Germany'] * 23,
        'iso2c': ['US'] * 23 + ['CN'] * 23 + ['DE'] * 23,
        'region': ['Americas'] * 23 + ['Asia'] * 23 + ['Europe'] * 23,
        'chemical': ['All'] * 69,
        'percentage': np.random.uniform(1, 10, 69),
        'lat': [38] * 23 + [35] * 23 + [51] * 23, 
        'lng': [-97] * 23 + [105] * 23 + [10] * 23,
        'cc': ['#1f77b4'] * 23 + ['#ff7f0e'] * 23 + ['#2ca02c'] * 23,
        'is_collab': [False] * 69
    })

Data file exists: True
Successfully loaded data with shape: (106855, 22)

Data columns:
- iso2c
- year
- value_raw
- percentage
- chemical
- iso3c
- country
- is_collab
- lat
- lng
- region
- cc
- flags
- country_x
- iso2c_x
- year_x
- percentage_x
- source
- condition
- blue_group
- red_group
- color_group

First 5 rows of data:


Unnamed: 0,iso2c,year,value_raw,percentage,chemical,iso3c,country,is_collab,lat,lng,...,flags,country_x,iso2c_x,year_x,percentage_x,source,condition,blue_group,red_group,color_group
0,CN,2022.0,281517.0,41.41,All,CHN,China,False,35.082435,106.847575,...,"""https://flagcdn.com/16x12/cn.png""",,,,,,,False,False,other
1,CN,2022.0,273153.0,34.1009,Organic,CHN,China,False,35.082435,106.847575,...,"""https://flagcdn.com/16x12/cn.png""",,,,,,,False,False,other
2,CN,2015.0,3881.0,41.13,Rare-Earths,CHN,China,False,35.082435,106.847575,...,"""https://flagcdn.com/16x12/cn.png""",,,,,,,False,False,other
3,CN,2019.0,3852.0,40.03,Rare-Earths,CHN,China,False,35.082435,106.847575,...,"""https://flagcdn.com/16x12/cn.png""",,,,,,,False,False,other
4,CN,2022.0,3380.0,38.58,Rare-Earths,CHN,China,False,35.082435,106.847575,...,"""https://flagcdn.com/16x12/cn.png""",,,,,,,False,False,other


### 2.1 Process Country List

Now let's process the country list which is used for the map and filters.

In [15]:
# Process country list
country_list = (
    df[df['is_collab'] == False]
    .drop_duplicates(subset=['country', 'iso2c', 'lat', 'lng', 'cc', 'region'])
    .dropna(subset=['country', 'iso2c'])
    .query("country != '' and iso2c != ''")
    .fillna({'region': 'Other'})
    .sort_values('country')
    .reset_index(drop=True)
)

print(f"Processed country list with {len(country_list)} countries")
display(country_list.head())

# Extract other UI elements
chemical_categories = sorted(df['chemical'].dropna().unique())
regions = sorted(country_list['region'].unique())
min_year = int(df['year'].min())
max_year = int(df['year'].max())

print(f"\nChemical categories: {chemical_categories}")
print(f"Regions: {regions}")
print(f"Year range: {min_year} to {max_year}")

Processed country list with 155 countries


Unnamed: 0,iso2c,year,value_raw,percentage,chemical,iso3c,country,is_collab,lat,lng,...,flags,country_x,iso2c_x,year_x,percentage_x,source,condition,blue_group,red_group,color_group
0,DZ,2002.0,18.0,0.208841,Rare-Earths,DZA,Algeria,False,29.228078,1.780595,...,"""https://cdn.rawgit.com/lipis/flag-icon-css/ma...",,,,,,,False,False,other
1,AR,2002.0,35.0,0.40608,Rare-Earths,ARG,Argentina,False,-37.65934,-65.45939,...,"""https://cdn.rawgit.com/lipis/flag-icon-css/ma...",,,,,,,False,False,other
2,AM,2001.0,338.0,0.110226,Organic,ARM,Armenia,False,40.210921,45.233042,...,"""https://cdn.rawgit.com/lipis/flag-icon-css/ma...",,,,,,,False,False,other
3,AU,2000.0,845.0,2.067582,Organometallic,AUS,Australia,False,-25.182997,136.998543,...,"""https://cdn.rawgit.com/lipis/flag-icon-css/ma...",,,,,,,False,False,other
4,AT,2005.0,274.0,0.620022,Organometallic,AUT,Austria,False,47.579732,13.473366,...,"""https://cdn.rawgit.com/lipis/flag-icon-css/ma...",,,,,,,False,False,other



Chemical categories: ['', 'All', 'Organic', 'Organometallic', 'Rare-Earths']
Regions: ['Africa', 'Asia', 'Europe', 'North America', 'Oceania', 'South America']
Year range: 1996 to 2022


## 3. Testing Helper Functions

Now let's test each of the helper functions to see how they work.

### 3.1 Testing Data Filtering

Data filtering functionality is used to process data based on user selections.

In [16]:
# Define sample selection parameters
selected_countries = ['US', 'CN']  # Example ISO codes
year_range = (1996, 2022)  # Example year range
chemical_category = chemical_categories[1]  # First chemical category

# Filter base data
filtered_df = df[
    (df['year'] >= year_range[0]) & 
    (df['year'] <= year_range[1])
]

if chemical_category != "All":
    filtered_df = filtered_df[filtered_df['chemical'] == chemical_category]
    
# Get individual country data
individual_data = filtered_df[
    (filtered_df['is_collab'] == False) & 
    (filtered_df['iso2c'].isin(selected_countries))
]

print(f"Filtered data shape: {filtered_df.shape}")
print(f"Individual country data shape: {individual_data.shape}")

# Check if there's collaboration data
collab_data = None
if 'is_collab' in df.columns:
    collab_df = filtered_df[filtered_df['is_collab'] == True]
    mask = pd.Series([False] * len(collab_df))
    for iso in selected_countries:
        mask = mask | collab_df['iso2c'].str.contains(iso, na=False)
    
    collab_data = collab_df[mask]
    print(f"Collaboration data shape: {collab_data.shape}")
    if not collab_data.empty:
        display(collab_data.head())
    else:
        print("No collaboration data found for the selected countries")

Filtered data shape: (106208, 22)
Individual country data shape: (216, 22)
Collaboration data shape: (21223, 22)



Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,iso2c,year,value_raw,percentage,chemical,iso3c,country,is_collab,lat,lng,...,flags,country_x,iso2c_x,year_x,percentage_x,source,condition,blue_group,red_group,color_group
1169,AE-AU-CN-FI-FR-GB-IT-US,2015.0,12.0,0.001913,All,ARE-AUS-CHN-FIN-FRA-GBR-ITA-USA,"United Arab Emirates, Australia, China, Finlan...",True,,,...,,,,,,,,False,False,other
1186,AE-AU-CN-FI-FR-GB-IT-US,2015.0,11.0,0.001821,Organic,ARE-AUS-CHN-FIN-FRA-GBR-ITA-USA,"United Arab Emirates, Australia, China, Finlan...",True,,,...,,,,,,,,False,False,other
1621,AE-CM-ES-FR-US,2021.0,9.0,0.001285,Organic,ARE-CMR-ESP-FRA-USA,"United Arab Emirates, Cameroon, Spain, France,...",True,,,...,,,,,,,,False,False,other
1622,AE-CM-ES-FR-US,2021.0,9.0,0.00125,All,ARE-CMR-ESP-FRA-USA,"United Arab Emirates, Cameroon, Spain, France,...",True,,,...,,,,,,,,False,False,other
1637,AE-CN,2018.0,1.0,0.009413,Rare-Earths,ARE-CHN,"United Arab Emirates, China",True,,,...,,,,,,,,False,False,other


### 3.2 Testing Trends Plot

Now let's test creating the main trends plot.

In [17]:
# Test creating trends plot
display_mode = "compare_individuals"  # Options: "compare_individuals" or "find_collaborations"

# Use filtered individual data from above
plot_data = individual_data.copy()

if not plot_data.empty:
    fig = create_trends_plot(plot_data, selected_countries, display_mode)
    fig.show()
else:
    print("No data available for trends plot")
    fig = create_empty_plot("No data available for selected filters")
    fig.show()

### 3.3 Testing Contribution Choropleth Map

Let's test the choropleth map functionality.

In [18]:



# Test creating contribution choropleth
if not individual_data.empty:
    fig = create_contribution_choropleth(individual_data)
    fig.show()
else:
    print("No data available for choropleth map")
    fig = create_empty_plot("No data available for selected filters")
    fig.show()

### 3.4 Testing Summary Data Table

Let's test creating the summary data table.

In [19]:
# Test creating summary dataframe
if not individual_data.empty:
    summary_df = create_summary_dataframe(individual_data, display_mode)
    display(summary_df)
else:
    print("No data available for summary table")

Unnamed: 0,Error
0,Missing 'total_percentage' column or no data f...


### 3.5 Testing Interactive Folium Map

Let's try creating the interactive Folium map.

#### First test the GeoJSON File

In [20]:
# Add this to your notebook to check the GeoJSON file
try:
    import geopandas as gpd
    
    world_path = "./data/world_boundaries.geojson"
    world = gpd.read_file(world_path)
    
    print(f"Successfully loaded GeoJSON file with shape: {world.shape}")
    print("\nColumns in GeoJSON:")
    print(world.columns.tolist())
    
    # Check what ISO column is present
    iso_cols = [col for col in world.columns if 'iso' in col.lower()]
    print(f"\nPossible ISO columns: {iso_cols}")
    
    # Check a few sample values
    if len(iso_cols) > 0:
        sample_col = iso_cols[0]
        print(f"\nSample values from {sample_col}:")
        print(world[sample_col].head())
    
    # Check for US, CN as examples
    for iso in ['US', 'CN']:
        for col in iso_cols:
            matches = world[world[col] == iso]
            if not matches.empty:
                print(f"Found {iso} in column {col}: {len(matches)} matches")
    
except Exception as e:
    print(f"Error examining GeoJSON file: {e}")

Successfully loaded GeoJSON file with shape: (175, 170)

Columns in GeoJSON:
['featurecla', 'scalerank', 'labelrank', 'sovereignt', 'sov_a3', 'adm0_dif', 'level', 'type', 'tlc', 'admin', 'adm0_a3', 'geou_dif', 'geounit', 'gu_a3', 'su_dif', 'subunit', 'su_a3', 'brk_diff', 'name', 'name_long', 'brk_a3', 'brk_name', 'brk_group', 'abbrev', 'postal', 'formal_en', 'formal_fr', 'name_ciawf', 'note_adm0', 'note_brk', 'name_sort', 'name_alt', 'mapcolor7', 'mapcolor8', 'mapcolor9', 'mapcolor13', 'pop_est', 'pop_rank', 'pop_year', 'gdp_md', 'gdp_year', 'economy', 'income_grp', 'fips_10', 'iso_a2', 'iso_a2_eh', 'iso_a3', 'iso_a3_eh', 'iso_n3', 'iso_n3_eh', 'un_a3', 'wb_a2', 'wb_a3', 'woe_id', 'woe_id_eh', 'woe_note', 'adm0_iso', 'adm0_diff', 'adm0_tlc', 'adm0_a3_us', 'adm0_a3_fr', 'adm0_a3_ru', 'adm0_a3_es', 'adm0_a3_cn', 'adm0_a3_tw', 'adm0_a3_in', 'adm0_a3_np', 'adm0_a3_pk', 'adm0_a3_de', 'adm0_a3_gb', 'adm0_a3_br', 'adm0_a3_il', 'adm0_a3_ps', 'adm0_a3_sa', 'adm0_a3_eg', 'adm0_a3_ma', 'adm0_a3_p

In [21]:
# Import the create_folium_map function
from app import create_folium_map

# Test creating folium map
map_html = create_folium_map(country_list, selected_countries)

# Display map
from IPython.display import HTML
display(HTML(map_html))

TypeError: 'Map' object is not subscriptable

## 4. Testing Article Plots

Now let's test the different article plots.

### 4.1 Main Countries Article Plot

In [None]:
# Process article data
article_columns = ['source', 'year_x', 'country_x', 'percentage_x']
if all(col in df.columns for col in article_columns):
    article_data = df[article_columns].dropna().copy()
    article_data.columns = ['source', 'year', 'country', 'value']
    
    # Filter for specific source
    df_filtered = article_data[article_data['source'] == "Country participation in the CS"]
    
    if not df_filtered.empty:
        print(f"Found {len(df_filtered)} rows for 'Country participation in the CS'")
        fig = create_article_plot(df_filtered, "Country participation in the CS")
        fig.show()
    else:
        print("No data found for 'Country participation in the CS'")
else:
    print("Article data columns not found in the dataset")
    # Create dummy article data for testing
    print("Creating dummy article data...")
    article_data = pd.DataFrame({
        'source': ['Country participation in the CS'] * 69,
        'year': list(range(2000, 2023)) * 3,
        'country': ['United States'] * 23 + ['China'] * 23 + ['Germany'] * 23,
        'value': np.random.uniform(1, 10, 69),
    })
    
    df_filtered = article_data[article_data['source'] == "Country participation in the CS"]
    fig = create_article_plot(df_filtered, "Country participation in the CS")
    fig.show()

Found 324 rows for 'Country participation in the CS'


### 4.2 Top Collaborations Plot

In [None]:
# Test top collaborations plot
is_collab = True  # True for collaborations, False for individual countries
chem_filter = chemical_categories[1]  # First chemical category

filtered_data = df[
    (df['is_collab'] == is_collab) & 
    (df['chemical'] == chem_filter)
]

if not filtered_data.empty:
    # Get top 10
    top_data = (
        filtered_data.groupby('country')['percentage']
        .mean()
        .sort_values(ascending=False)
        .head(10)
    )
    
    top_filtered = filtered_data[filtered_data['country'].isin(top_data.index)]
    
    if not top_filtered.empty:
        fig = create_top_trends_plot(
            top_filtered,
            f"Top 10 {'Collaborations' if is_collab else 'Countries'}: {chem_filter}"
        )
        fig.show()
    else:
        print("No data available for top collaborations plot after filtering")
else:
    print("No data available for collaborations with the selected chemical filter")

## 5. Testing Other Article Plots

Let's test other article plots that might be missing in the app.

In [None]:
# Test GDP plot
gdp_source = "Annual growth rate of the GDP"
if 'source' in article_data.columns:
    gdp_data = article_data[article_data['source'] == gdp_source]
    
    if not gdp_data.empty:
        fig = create_article_plot(gdp_data, gdp_source)
        # Add the vertical lines and annotations for GDP as in the app code
        fig.add_vline(x=2007.5, line_dash="dash", line_color="grey")
        fig.add_vline(x=2019.5, line_dash="dash", line_color="grey")
        fig.add_annotation(x=2007.5, y=fig.data[0].y.max() * 0.9 if fig.data else 10,
                          text="Financial Crisis", showarrow=True)
        fig.add_annotation(x=2019.5, y=fig.data[0].y.max() * 0.8 if fig.data else 8,
                          text="COVID-19", showarrow=True)
        fig.show()
    else:
        print(f"No data found for '{gdp_source}'")
else:
    print("Article data does not have a 'source' column")

In [None]:
# Test Researchers plot
researchers_source = "Number of Researchers"
if 'source' in article_data.columns:
    researchers_data = article_data[article_data['source'] == researchers_source]
    
    if not researchers_data.empty:
        # Scale the values to millions for better display
        researchers_data['value'] = researchers_data['value'] / 1e6
        fig = create_article_plot(researchers_data, researchers_source)
        fig.update_layout(yaxis_title="Value (Millions)")
        fig.show()
    else:
        print(f"No data found for '{researchers_source}'")
else:
    print("Article data does not have a 'source' column")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
# Test CS Expansion plot
cs_expansion_source = "Expansion of the CS"
if 'source' in article_data.columns:
    cs_expansion_data = article_data[article_data['source'] == cs_expansion_source]
    
    if not cs_expansion_data.empty:
        fig = create_article_plot(cs_expansion_data, cs_expansion_source)
        fig.show()
    else:
        print(f"No data found for '{cs_expansion_source}'")
else:
    print("Article data does not have a 'source' column")

## 6. Implementing Missing Article Plot Functions

Based on the tests above, let's implement the missing article plot functions if needed.

In [None]:
# Implementation of missing article plot functions

def create_gdp_plot(data: pd.DataFrame):
    """Create GDP article plot with annotations for economic events"""
    fig = go.Figure()
    
    for country in data['country'].unique():
        country_data = data[data['country'] == country]
        fig.add_trace(go.Scatter(
            x=country_data['year'],
            y=country_data['value'],
            mode='lines+markers',
            name=country,
            line=dict(width=2),
            marker=dict(size=6)
        ))
    
    # Add vertical lines and annotations for economic events
    fig.add_vline(x=2007.5, line_dash="dash", line_color="grey")
    fig.add_vline(x=2019.5, line_dash="dash", line_color="grey")
    
    # Calculate y-position for annotations based on data
    max_val = max([max(trace['y']) for trace in fig.data]) if fig.data else 10
    
    fig.add_annotation(
        x=2007.5, 
        y=max_val * 0.9,
        text="Financial Crisis", 
        showarrow=True,
        arrowhead=1
    )
    fig.add_annotation(
        x=2019.5, 
        y=max_val * 0.8,
        text="COVID-19", 
        showarrow=True,
        arrowhead=1
    )
    
    fig.update_layout(
        title="Figure: Annual growth rate of the GDP",
        xaxis_title="Year",
        yaxis_title="GDP Growth Rate (%)",
        template='plotly_white',
        hovermode='x unified'
    )
    
    return fig

def create_researchers_plot(data: pd.DataFrame):
    """Create researchers plot with values in millions"""
    fig = go.Figure()
    
    for country in data['country'].unique():
        country_data = data[data['country'] == country]
        scaled_values = country_data['value'] / 1e6  # Convert to millions
        
        fig.add_trace(go.Scatter(
            x=country_data['year'],
            y=scaled_values,
            mode='lines+markers',
            name=country,
            line=dict(width=2),
            marker=dict(size=6)
        ))
    
    fig.update_layout(
        title="Figure: Number of Researchers",
        xaxis_title="Year",
        yaxis_title="Number of Researchers (Millions)",
        template='plotly_white',
        hovermode='x unified'
    )
    
    return fig

def create_cs_expansion_plot(data: pd.DataFrame):
    """Create chemical space expansion plot"""
    fig = go.Figure()
    
    for country in data['country'].unique():
        country_data = data[data['country'] == country]
        fig.add_trace(go.Scatter(
            x=country_data['year'],
            y=country_data['value'],
            mode='lines+markers',
            name=country,
            line=dict(width=2),
            marker=dict(size=6)
        ))
    
    fig.update_layout(
        title="Figure: Chemical Space Expansion",
        xaxis_title="Year",
        yaxis_title="Expansion Rate",
        template='plotly_white',
        hovermode='x unified'
    )
    
    return fig

In [None]:
# Test new implementations with dummy data if needed
dummy_gdp_data = pd.DataFrame({
    'source': ['Annual growth rate of the GDP'] * 69,
    'year': list(range(2000, 2023)) * 3,
    'country': ['United States'] * 23 + ['China'] * 23 + ['Germany'] * 23,
    'value': np.random.uniform(-5, 10, 69),  # GDP growth can be negative
})

fig = create_gdp_plot(dummy_gdp_data)
fig.show()

dummy_researchers_data = pd.DataFrame({
    'source': ['Number of Researchers'] * 69,
    'year': list(range(2000, 2023)) * 3,
    'country': ['United States'] * 23 + ['China'] * 23 + ['Germany'] * 23,
    'value': np.random.uniform(500000, 2000000, 69),  # Values in absolute numbers
})

fig = create_researchers_plot(dummy_researchers_data)
fig.show()

## 7. Suggested Implementation for App

Based on the testing, here's the implementations for the missing plot functions in the app.py file:

In [None]:
# add to app.py
print("""
# Add these functions to your app.py file:

def create_gdp_plot(data: pd.DataFrame):
    """Create GDP article plot with annotations for economic events"""
    fig = go.Figure()
    
    for country in data['country'].unique():
        country_data = data[data['country'] == country]
        fig.add_trace(go.Scatter(
            x=country_data['year'],
            y=country_data['value'],
            mode='lines+markers',
            name=country,
            line=dict(width=2),
            marker=dict(size=6)
        ))
    
    # Add vertical lines and annotations for economic events
    fig.add_vline(x=2007.5, line_dash="dash", line_color="grey")
    fig.add_vline(x=2019.5, line_dash="dash", line_color="grey")
    
    # Calculate y-position for annotations based on data
    max_val = max([max(trace['y']) for trace in fig.data]) if fig.data else 10
    
    fig.add_annotation(
        x=2007.5, 
        y=max_val * 0.9,
        text="Financial Crisis", 
        showarrow=True,
        arrowhead=1
    )
    fig.add_annotation(
        x=2019.5, 
        y=max_val * 0.8,
        text="COVID-19", 
        showarrow=True,
        arrowhead=1
    )
    
    fig.update_layout(
        title="Figure: Annual growth rate of the GDP",
        xaxis_title="Year",
        yaxis_title="GDP Growth Rate (%)",
        template='plotly_white',
        hovermode='x unified'
    )
    
    return fig

def create_researchers_plot(data: pd.DataFrame):
    """Create researchers plot with values in millions"""
    fig = go.Figure()
    
    for country in data['country'].unique():
        country_data = data[data['country'] == country]
        scaled_values = country_data['value'] / 1e6  # Convert to millions
        
        fig.add_trace(go.Scatter(
            x=country_data['year'],
            y=scaled_values,
            mode='lines+markers',
            name=country,
            line=dict(width=2),
            marker=dict(size=6)
        ))
    
    fig.update_layout(
        title="Figure: Number of Researchers",
        xaxis_title="Year",
        yaxis_title="Number of Researchers (Millions)",
        template='plotly_white',
        hovermode='x unified'
    )
    
    return fig

def create_cs_expansion_plot(data: pd.DataFrame):
    """Create chemical space expansion plot"""
    fig = go.Figure()
    
    for country in data['country'].unique():
        country_data = data[data['country'] == country]
        fig.add_trace(go.Scatter(
            x=country_data['year'],
            y=country_data['value'],
            mode='lines+markers',
            name=country,
            line=dict(width=2),
            marker=dict(size=6)
        ))
    
    fig.update_layout(
        title="Figure: Chemical Space Expansion",
        xaxis_title="Year",
        yaxis_title="Expansion Rate",
        template='plotly_white',
        hovermode='x unified'
    )
    
    return fig
""")

print("""
# Then modify your server function to add these plot outputs:

@output
@render.plot
def article_gdp_plot():
    article_data = data_objects['article_data']
    gdp_data = article_data[article_data['source'] == "Annual growth rate of the GDP"]
    
    if gdp_data.empty:
        return create_empty_plot("No GDP data available")
        
    return create_gdp_plot(gdp_data)

@output
@render.plot
def article_researchers_plot():
    article_data = data_objects['article_data']
    researchers_data = article_data[article_data['source'] == "Number of Researchers"]
    
    if researchers_data.empty:
        return create_empty_plot("No researchers data available")
        
    return create_researchers_plot(researchers_data)

@output
@render.plot
def article_cs_expansion_plot():
    article_data = data_objects['article_data']
    cs_data = article_data[article_data['source'] == "CS Expansion"]
    
    if cs_data.empty:
        return create_empty_plot("No CS expansion data available")
        
    return create_cs_expansion_plot(cs_data)
""")

## 8. Checking for Missing Imports

Let's verify that all necessary imports are included the app.py file.

In [None]:
print("""
Make sure app.py includes all these imports:

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import geopandas as gpd
import folium
from folium import plugins
from shiny import App, ui, render, reactive
from shiny.types import FileInfo
from pathlib import Path
from typing import List, Dict, Optional, Tuple
import numpy as np
""")