In [1]:
import pandas as pd
import plotly.express as px
from ipywidgets import widgets, VBox, Output, Button
from IPython.display import display

# Initialize global variables for data
apc_data = None
subject_data = None
article_data = None

# File upload widgets
apc_upload = widgets.FileUpload(accept='.csv', multiple=False, description='Upload APC')
subject_upload = widgets.FileUpload(accept='.csv,.xlsx', multiple=False, description='Upload Subjects')
article_upload = widgets.FileUpload(accept='.csv', multiple=False, description='Upload Details')

# Buttons and outputs
analyze_button = Button(description="Analyze Data", button_style='success')
results_output = Output()

# Define callback functions for uploads
def load_apc(change):
    global apc_data
    try:
        file = next(iter(apc_upload.value.values()))
        content = file['content']
        apc_data = pd.read_csv(pd.io.common.BytesIO(content))
        with results_output:
            print("APC data uploaded successfully.")
    except Exception as e:
        with results_output:
            print(f"Error uploading APC data: {e}")

def load_subject(change):
    global subject_data
    try:
        file = next(iter(subject_upload.value.values()))
        content = file['content']
        if file['name'].endswith('.xlsx'):
            subject_data = pd.read_excel(pd.io.common.BytesIO(content))
        else:
            subject_data = pd.read_csv(pd.io.common.BytesIO(content))
        with results_output:
            print("Subject data uploaded successfully.")
    except Exception as e:
        with results_output:
            print(f"Error uploading Subject data: {e}")

def load_article(change):
    global article_data
    try:
        file = next(iter(article_upload.value.values()))
        content = file['content']
        article_data = pd.read_csv(pd.io.common.BytesIO(content))
        with results_output:
            print("Article Details data uploaded successfully.")
    except Exception as e:
        with results_output:
            print(f"Error uploading Article Details data: {e}")

apc_upload.observe(load_apc, names='value')
subject_upload.observe(load_subject, names='value')
article_upload.observe(load_article, names='value')

# Analysis function
def analyze_data(_):
    results_output.clear_output()
    with results_output:
        # Check if required files are loaded
        if apc_data is None or subject_data is None or article_data is None:
            print("Please ensure all files are uploaded before analyzing.")
            return
        
        # Example analysis: Calculate Hybrid OA Ratio
        hybrid_articles = article_data[article_data['Article publishing model'] == 'Hybrid open access']
        total_articles = len(article_data)
        hybrid_ratio = len(hybrid_articles) / total_articles if total_articles > 0 else 0

        print(f"Hybrid OA Ratio: {hybrid_ratio:.2%}")

        # Visualization example
        hybrid_counts = hybrid_articles.groupby('Article Payer Institute Name').size().reset_index(name='Count')
        fig = px.bar(hybrid_counts, x='Article Payer Institute Name', y='Count', title='Hybrid OA Counts by Institute')
        fig.show()

analyze_button.on_click(analyze_data)

# Display UI
display(VBox([
    widgets.Label("Step 1: Upload APC Data"),
    apc_upload,
    widgets.Label("Step 2: Upload Journal Subject Data"),
    subject_upload,
    widgets.Label("Step 3: Upload Article Details Data"),
    article_upload,
    analyze_button,
    results_output
]))


VBox(children=(Label(value='Step 1: Upload APC Data'), FileUpload(value=(), accept='.csv', description='Upload…