In [1]:
import pandas as pd
import plotly.express as px
import os
from ipywidgets import widgets, VBox, Output, Button
from IPython.display import display

# Initialize global variables for data
apc_data = None
subject_data = None
article_data = None

# File paths for preloaded data
apc_file_path = "APC.csv"
subject_file_path = "subj.csv"
article_file_path = "details.csv"

# Load APC data automatically if available
if os.path.exists(apc_file_path):
    apc_data = pd.read_csv(apc_file_path)
    print(f"Loaded preloaded APC data from {apc_file_path}.")
else:
    print("APC data file not found. Please upload it.")

# Load Journal Subject data automatically if available
if os.path.exists(subject_file_path):
    subject_data = pd.read_csv(subject_file_path)
    print(f"Loaded preloaded Journal Subject data from {subject_file_path}.")
else:
    print("Journal Subject data file not found. Please upload it.")

# Load Article Details data automatically if available
if os.path.exists(article_file_path):
    article_data = pd.read_csv(article_file_path)
    print(f"Loaded preloaded Article Details data from {article_file_path}.")
else:
    print("Article Details data file not found. Please upload it.")

# File upload widgets
apc_upload = widgets.FileUpload(accept='.csv', multiple=False, description='Upload APC')
subject_upload = widgets.FileUpload(accept='.csv,.xlsx', multiple=False, description='Upload Subjects')
article_upload = widgets.FileUpload(accept='.csv', multiple=False, description='Upload Details')

# Buttons and outputs
analyze_button = Button(description="Analyze Data", button_style='success')
results_output = Output()

# Define callback functions
def load_apc(change):
    global apc_data
    file = next(iter(apc_upload.value.values()))
    apc_data = pd.read_csv(file['content'])
    print("APC data uploaded successfully.")

def load_subject(change):
    global subject_data
    file = next(iter(subject_upload.value.values()))
    if file['name'].endswith('.xlsx'):
        subject_data = pd.read_excel(file['content'])
    else:
        subject_data = pd.read_csv(file['content'])
    print("Subject data uploaded successfully.")

def load_article(change):
    global article_data
    file = next(iter(article_upload.value.values()))
    article_data = pd.read_csv(file['content'])
    print("Article Details data uploaded successfully.")

apc_upload.observe(load_apc, names='value')
subject_upload.observe(load_subject, names='value')
article_upload.observe(load_article, names='value')

# Analysis function
def analyze_data(_):
    results_output.clear_output()
    with results_output:
        # Check if required files are loaded
        if apc_data is None or subject_data is None or article_data is None:
            print("Please ensure all files are uploaded before analyzing.")
            return
        
        # Example analysis: Calculate Hybrid OA Ratio
        hybrid_articles = article_data[article_data['Article publishing model'] == 'Hybrid open access']
        total_articles = len(article_data)
        hybrid_ratio = len(hybrid_articles) / total_articles if total_articles > 0 else 0

        print(f"Hybrid OA Ratio: {hybrid_ratio:.2%}")

        # Visualization example
        hybrid_counts = hybrid_articles.groupby('Article Payer Institute Name').size().reset_index(name='Count')
        fig = px.bar(hybrid_counts, x='Article Payer Institute Name', y='Count', title='Hybrid OA Counts by Institute')
        fig.show()

analyze_button.on_click(analyze_data)

# Display UI
display(VBox([
    widgets.Label("Step 1: Upload APC Data"),
    apc_upload,
    widgets.Label("Step 2: Upload Journal Subject Data"),
    subject_upload,
    widgets.Label("Step 3: Upload Article Details Data"),
    article_upload,
    analyze_button,
    results_output
]))


Loaded preloaded APC data from APC.csv.
Journal Subject data file not found. Please upload it.
Loaded preloaded Article Details data from details.csv.


VBox(children=(Label(value='Step 1: Upload APC Data'), FileUpload(value=(), accept='.csv', description='Upload…