In [2]:
import pandas as pd
import requests
import json
import numpy as np

# Read the Excel file
df = pd.read_excel('Academic -- dataset.xlsx')

# Function to handle NaN values and ensure proper string encoding
def clean_value(value):
    if pd.isna(value):  # Check if value is NaN
        return ""  # Return empty string for NaN values
    if isinstance(value, (np.int64, np.float64)):
        # Return integer if possible, otherwise float
        return int(value) if float(value).is_integer() else float(value)
    if isinstance(value, str):
        return value.strip()  # Remove any extra whitespace
    return value

# Create the new JSON structure
records = []
for _, row in df.iterrows():
    # Get the primary title and additional title from the respective columns
    primary_title = clean_value(row['title_academic'])
    additional_title = clean_value(row['Alternative Administrative Title (for display)'])
    
    # Construct the final title by appending additional title if available
    if primary_title and additional_title:
        final_title = f"{primary_title}, {additional_title}"
    elif primary_title:
        final_title = primary_title
    elif additional_title:
        final_title = additional_title
    else:
        final_title = ""

    record = {
        "name": clean_value(row['First_name']),
        "title": final_title,
        "field": clean_value(row['Field_clean']),
        "fieldCategory": clean_value(row['Field_category']),
        "researchInterests": clean_value(row['Research_2_keywords']),
        "phdFrom": clean_value(row['Education Institution']),
        "institutionType": clean_value(row['Private/public']),
        "gender": clean_value(row['Gender']),
        "citizenship": clean_value(row['Citizenship']),
        "hasRole": bool(clean_value(row['Binary_HaveRole'])) if 'Binary_HaveRole' in row else False,
        # Use title_admin to indicate administrative title status. 
        # The JS will check that roleDetail is non-empty if the person holds an administrative position.
        "roleDetail": bool(clean_value(row['title_admin'])),
        "intro": clean_value(row.get('Intro_one_setence', '')),
        "Original_name": clean_value(row['Original_name'])
    }
    records.append(record)

# Prepare the JSON data
json_data = {"record": records}

# Debugging: print the first record to check for proper encoding of any special characters
print("Sample record (first entry):")
print(json.dumps(records[0], ensure_ascii=False, indent=2))

# JSONBin.io API endpoint
url = "https://api.jsonbin.io/v3/b"

# Headers with explicit charset
headers = {
    'Content-Type': 'application/json; charset=utf-8',
    'X-Master-Key': '$2a$10$bhPnwdTOKrnJ6a7RgKY8MeqyL.bjndmAI47QRVXV4snklbkmxK2DK'
}

try:
    # Make the POST request to create a new bin
    response = requests.post(
        url,
        data=json.dumps(json_data, ensure_ascii=False).encode('utf-8'),
        headers=headers
    )

    # Check the response
    if response.status_code == 200:
        print("Successfully created new JSONBin!")
        print("Bin ID:", response.json()['metadata']['id'])
    else:
        print("Error creating JSONBin:", response.text)

except Exception as e:
    print(f"An error occurred: {str(e)}")


Sample record (first entry):
{
  "name": "Yunhui",
  "title": "Research Scientist",
  "field": "Geology",
  "fieldCategory": "Sciences",
  "researchInterests": "new minerals, geological processes",
  "phdFrom": "Chinese Academy of Geological Sciences",
  "institutionType": "public",
  "gender": "Female",
  "citizenship": "China",
  "hasRole": false,
  "roleDetail": false,
  "intro": "",
  "Original_name": "蕴慧"
}
Successfully created new JSONBin!
Bin ID: 67c85747acd3cb34a8f56da2


In [3]:
import pandas as pd

# Load the academic dataset (adjust the file path as needed)
df = pd.read_excel('Academic -- dataset.xlsx')

# Create binary column for female (1 if female, 0 otherwise)
df['Female'] = df['Gender'].apply(lambda x: 1 if str(x).strip().lower() == 'female' else 0)

# Create binary column for sciences (1 if Field_category equals "Sciences", 0 otherwise)
df['Sciences'] = df['Field_category'].apply(lambda x: 1 if str(x).strip() == 'Sciences' else 0)

# Calculate the Pearson correlation between Female and Sciences
correlation = df['Female'].corr(df['Sciences'])
print("Correlation between being female and belonging to Sciences:", correlation)

# For additional context, display the contingency table (cross-tabulation)
contingency = pd.crosstab(df['Female'], df['Sciences'])
print("\nContingency table (rows: Female, columns: Sciences):")
print(contingency)


Correlation between being female and belonging to Sciences: -0.09022566592313516

Contingency table (rows: Female, columns: Sciences):
Sciences   0   1
Female          
0         39  21
1         44  16
