In [23]:
import json
import os

# Define the directory where your JSON files are stored
directory_path = './seed_data_1'

# Function to process each JSON file
def process_json_files(directory_path):
    all_documents = []  # List to hold all documents' data
    
    # Iterate over each file in the directory
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.json'):  # Check if the file is a JSON file
            file_path = os.path.join(directory_path, file_name)
            with open(file_path, 'r') as file:
                data = json.load(file)  # Load the JSON data
                
                # Extracting necessary information
                document = {
                    "title": data.get("title", ""),
                    "fulltext": data.get("fullText", ""),
                    "authors": [author.get("name", "No name provided") for author in data.get("authors", [])],
                    "published_date": data.get("publishedDate", ""),
                    "doi": data.get("doi", "No DOI provided"),
                    "data_provider_name": data.get("dataProvider", {}).get("name", "No provider name"),
                    "document_type": data.get("documentType", "No document type"),
                    "tags": data.get("tags", [])
                }
                
                all_documents.append(document)  # Add the document's data to our list

    return all_documents

# Process the JSON files and get the documents' data
documents_data = process_json_files(directory_path)

# Example: Print the extracted information of the first document for verification
if documents_data:  # Check if there's at least one document
    first_doc = documents_data[0]
    print("Title:", first_doc["title"])
    print("Fulltext:", first_doc["fulltext"])
    print("Authors:", ', '.join(first_doc["authors"]))
    print("Published Date:", first_doc["published_date"])
    print("DOI:", first_doc["doi"])
    print("Data Provider:", first_doc["data_provider_name"])
    print("Document Type:", first_doc["document_type"])
    print("Tags:", ', '.join(first_doc["tags"]))
else:
    print("No documents found.")


Title: Workplace climate, degree of outness, and job satisfaction of gay and lesbian professional staff in higher education.
Fulltext:  JOHNSON, ROBERT BRADLEY, Ph.D. Workplace Climate, Degree of Outness, and Job Satisfaction of Gay and Lesbian Professional Staff in Higher Education. (2009) Directed by Dr. Deborah J. Taub. 174 pp.   Issues of diversity across college campuses are seeing a growing acceptance, however, acceptance across diversity groups is not seen as uniformly accepting, especially among gay and lesbian populations who may often find themselves in hostile, unwelcoming, or uncomfortable environments. Despite the perception that universities have been sites for political and social changes concerning the gay and lesbian population, institutions of higher education still have room for further growth concerning gay and lesbian populations and homophobia, especially as it relates to staff members. Although there is substantial research related to gay and lesbian individuals 

In [14]:
import labelbox
from labelbox.schema.data_row_metadata import DataRowMetadataKind

# Initialize Labelbox client
client = labelbox.Client(api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbHN4eGlrMTcwMDJrMDcxNTQ3ZXlkc25hIiwib3JnYW5pemF0aW9uSWQiOiJjbHN4eGlqOWMwMDJqMDcxNWR4cmkxa3Y4IiwiYXBpS2V5SWQiOiJjbHVpNzY3czgwMTMwMDd6NDJydng1eW5nIiwic2VjcmV0IjoiM2Y4MmMyNGQ0NDIzN2VhNGQ1MDZhMDlmN2U2NjliM2IiLCJpYXQiOjE3MTIwNTEzNTQsImV4cCI6MjM0MzIwMzM1NH0.-FA3t7q7ingRvqyGsbfXpvee9N4-FefB-LOdkdTLRgo")

metadata_ontology = client.get_data_row_metadata_ontology()

# Function to create a metadata schema if it does not already exist
def create_metadata_schema(name, kind):
    existing_schema = metadata_ontology.get_by_name(name)
    if existing_schema is None:
        return metadata_ontology.create_schema(name=name, kind=kind)
    return existing_schema

# Example metadata schemas to create
metadata_schemas = {
    "Title": DataRowMetadataKind.string,
    "Authors": DataRowMetadataKind.string,
    "Published Date": DataRowMetadataKind.string,
    "DOI": DataRowMetadataKind.string,
    "Data Provider": DataRowMetadataKind.string,
    "Document Type": DataRowMetadataKind.string,
    "Tags": DataRowMetadataKind.enum,  # Assuming tags can be a predefined set of values
}

# Create the schemas
for name, kind in metadata_schemas.items():
    schema = create_metadata_schema(name, kind)
    print(f"Created/Found schema for {name} with ID {schema.uid}")

Created/Found schema for Title with ID clui7ccoo00hc07zjbrxp11zr
Created/Found schema for Authors with ID clui7cle400nd0706ectc9b48
Created/Found schema for Published Date with ID clui7w0yy0096072x686xd4zd
Created/Found schema for DOI with ID clui7d9yo005t070j0uunfc6e
Created/Found schema for Data Provider with ID clui7dftl004e070n06re1xrk
Created/Found schema for Document Type with ID clui7dliz01600717a1f46ry5
Created/Found schema for Tags with ID clui7hhzy01ax07yob22s83ef


In [24]:
import json
from labelbox import Client, DataRowMetadataField
from uuid import uuid4
import concurrent.futures

API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbHN4eGlrMTcwMDJrMDcxNTQ3ZXlkc25hIiwib3JnYW5pemF0aW9uSWQiOiJjbHN4eGlqOWMwMDJqMDcxNWR4cmkxa3Y4IiwiYXBpS2V5SWQiOiJjbHVpNzY3czgwMTMwMDd6NDJydng1eW5nIiwic2VjcmV0IjoiM2Y4MmMyNGQ0NDIzN2VhNGQ1MDZhMDlmN2U2NjliM2IiLCJpYXQiOjE3MTIwNTEzNTQsImV4cCI6MjM0MzIwMzM1NH0.-FA3t7q7ingRvqyGsbfXpvee9N4-FefB-LOdkdTLRgo"
client = Client(api_key=API_KEY)

dataset = client.create_dataset(name="The Bayard Corpus")

def upload_document(doc):
    try:
        fulltext = doc["fulltext"]
        # Convert the text to JSON string and check its size
        fulltext_json_string = json.dumps({"text": fulltext})
        if len(fulltext_json_string.encode('utf-8')) > 16 * 1024 * 1024:  # 16MB limit
            return f"Document {doc['title']} exceeds the 16MB limit and won't be uploaded."

        metadata_fields = [
            DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Title").uid, value=doc["title"]),
            DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Authors").uid, value=', '.join(doc["authors"])),
            # Treat Published Date as a string
            DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Published Date").uid, value=str(doc["published_date"])),
            DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("DOI").uid, value=doc["doi"]),
            DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Data Provider").uid, value=doc["data_provider_name"]),
            DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Document Type").uid, value=doc["document_type"]),
            DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Tags").uid, value=", ".join(doc["tags"])),
    ]

        # If the document is within the allowed size, proceed to upload
        asset = {
            "row_data": fulltext,
            "global_key": str(uuid4()),
            "media_type": "TEXT",
            "metadata_fields": metadata_fields,
        }

        task = dataset.create_data_rows([asset])
        task.wait_till_done()
        if task.errors:
            return f"Error uploading document {doc['title']}: {task.errors}"
        return f"Document {doc['title']} uploaded successfully."
    except Exception as e:
        return f"Exception occurred while uploading document {doc['title']}: {e}"

# Use ThreadPoolExecutor to upload documents in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    # Map each document in documents_data to the upload function
    futures = [executor.submit(upload_document, doc) for doc in documents_data]
    for future in concurrent.futures.as_completed(futures):
        print(future.result())

Document Introduction to special issue “Geographies of Sexualities” [\u3ci\u3eJournal of Lesbian Studies\u3c/i\u3e] uploaded successfully.
Document Rich Dad, Gay Dad: The Wealth Traps of Gay Fatherhood uploaded successfully.
Document Drama Therapy and Transgender Adolescents uploaded successfully.
Document Socialising and sexual health: an evaluation of the needs of gay, bisexual men and men who have sex with men (MSM) in Sheffield uploaded successfully.


There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details


Document Gender and Voice uploaded successfully.
Error uploading document Imag(in)ing Trans Partnerships: Collaborative Photography and Intimacy: [{'message': 'Cannot determine value for RowData:  (code: 781b3c42-279c-462b-b0a9-da65d7e8c4e8)', 'failedDataRows': [{'rowData': 'Unresolvable row data', 'globalKey': '92f142c1-5e6e-4bff-b09e-190abf3c20fe', 'metadata': [{'schemaId': 'clui7ccoo00hc07zjbrxp11zr', 'value': 'Imag(in)ing Trans Partnerships: Collaborative Photography and Intimacy', 'name': None}, {'schemaId': 'clui7cle400nd0706ectc9b48', 'value': 'Davidmann, Sara', 'name': None}, {'schemaId': 'clui7w0yy0096072x686xd4zd', 'value': '2013-12-02T00:00:00+00:00', 'name': None}, {'schemaId': 'clui7d9yo005t070j0uunfc6e', 'value': '10.1080/00918369.2014.865481', 'name': None}, {'schemaId': 'clui7dftl004e070n06re1xrk', 'value': 'UAL Research Online', 'name': None}, {'schemaId': 'clui7dliz01600717a1f46ry5', 'value': 'research', 'name': None}, {'schemaId': 'clui7hhzy01ax07yob22s83ef', 'value'

There are errors present. Please look at `task.errors` for more details


Document "Why wave the flag?":(In)visibile queer activism in authoritarian Kazakhstan and Russia uploaded successfully.
Document Sexual preferences, shame, psychological and physical health: What’s the relationship? uploaded successfully.
Document From 'celluloid comrades' to 'digital video activism': queer filmmaking in postsocialist China uploaded successfully.
Error uploading document Same-sex marriage and the sexual hierarchy: constructing the homonormative and homoradical legal identities: [{'message': 'RowData length exceeds max size of: 256000, was: 767286 (code: 738ab29c-93c4-48ae-b4e6-025ae016a898)', 'failedDataRows': [{'rowData': 'Northumbria Research LinkCitation:  Maine,  Alexander (2019)  Same-sex marriage and the sexual  hierarchy:  constructing the homonormative and homoradical legal identities. Doctoral thesis, Northumbria University. This version was downloaded from Northumbria Research Link: http://nrl.northumbria.ac.uk/42401/Northumbria University has developed North

There are errors present. Please look at `task.errors` for more details


Document Outness, Discrimination, and Serious Mental Illness Among LGBTQ Southerners uploaded successfully.
Error uploading document I am Human, Too! An Analysis of Conflict Resolution Theories and Their Applicability to the LGBTQ Community: [{'message': 'RowData length exceeds max size of: 256000, was: 285562 (code: af9e0c36-06e2-4a7c-8d98-1239cfb9f315)', 'failedDataRows': [{'rowData': 'Nova Southeastern UniversityNSUWorksDepartment of Conflict Resolution Studies Thesesand Dissertations CAHSS Theses and Dissertations1-1-2017I am Human, Too! An Analysis of ConflictResolution Theories and Their Applicability to theLGBTQ CommunityJaime AnzalottaNova Southeastern University, anzalott@mynsu.nova.eduThis document is a product of extensive research conducted at the Nova Southeastern University College ofArts, Humanities, and Social Sciences. For more information on research and degree programs at the NSUCollege of Arts, Humanities, and Social Sciences, please click here.Follow this and addit

There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details


Document LGBTQ and finance uploaded successfully.
Document A House Divided: Same-Sex Marriage and Dangers to Civil Rights uploaded successfully.
Document The Revolution Will Be Televised But Not Supported: Student Protest at Marquette University uploaded successfully.
Error uploading document The Politics of Disgust: Public Opinion Toward LGBTQ People & Policies.: [{'message': 'Cannot determine value for RowData:  (code: b7d6ecc6-c2c8-451e-9066-c2effca872fa)', 'failedDataRows': [{'rowData': 'Unresolvable row data', 'globalKey': 'a9c5c7b8-4691-4370-b4c9-2eaf58195bd9', 'metadata': [{'schemaId': 'clui7ccoo00hc07zjbrxp11zr', 'value': 'The Politics of Disgust: Public Opinion Toward LGBTQ People & Policies.', 'name': None}, {'schemaId': 'clui7cle400nd0706ectc9b48', 'value': 'Casey, Logan', 'name': None}, {'schemaId': 'clui7w0yy0096072x686xd4zd', 'value': '2016-01-01T00:00:00+00:00', 'name': None}, {'schemaId': 'clui7d9yo005t070j0uunfc6e', 'value': '', 'name': None}, {'schemaId': 'clui7dftl00

There are errors present. Please look at `task.errors` for more details


Document You Need to Calm Down: Examining the Origin and Eliminating the Future of the “Gay Panic” Defense uploaded successfully.
Document Rhetorical Criticism of \u3cem\u3eTrevor\u3c/em\u3e uploaded successfully.
Document The Impact on Maryland\u27s Budget of Allowing Same-Sex Couples to Marry uploaded successfully.
Document Marrying European and Domestic Politics? The Marriage Referendum in Croatia and Value-Based Euroscepticism uploaded successfully.
Document Food for Thought: A Framework for Social Justice in Social Studies Education uploaded successfully.
Error uploading document Homeric Studies, Feminism, and Queer Theory: Interpreting Helen and Penelope: [{'message': 'Cannot determine value for RowData:  (code: 8d9d3460-81ce-42e9-9b42-425099cb7da2)', 'failedDataRows': [{'rowData': 'Unresolvable row data', 'globalKey': '4ff0a364-f8ab-4e92-b86f-99e3eee71c4d', 'metadata': [{'schemaId': 'clui7ccoo00hc07zjbrxp11zr', 'value': 'Homeric Studies, Feminism, and Queer Theory: Interpreting 

There are errors present. Please look at `task.errors` for more details


Document Evidence and knowledge gaps on the disease burden in sexual and gender minorities : a review of systematic reviews uploaded successfully.
Error uploading document A systematic review of lesbian, gay, bisexual and transgender health in the West Midlands region of the UK compared to published UK research: [{'message': 'RowData length exceeds max size of: 256000, was: 303091 (code: 00a89f3a-ecb9-4906-b710-3d4903e21a98)', 'failedDataRows': [{'rowData': 'UNIVERSITYOF BIRMINGHAM A systematic review of lesbian, gay, bisexual and transgender health in the West Midlands region of the UK compared to published UK research   Catherine Meads, Mary Pennant, James McManus & Sue Bayliss Unit of Public Health, Epidemiology & Biostatistics West Midlands Health Technology Assessment Group DPHE 2009 Report Number 71   A systematic review of lesbian, gay, bisexual and transgender health in the West Midlands region of the UK compared to published UK research   A WEST MIDLANDS HEALTH TECHNOLOGY ASSE

There are errors present. Please look at `task.errors` for more details


Document Transition in Poland, Poland in Transition:Tracing the history of gender transition discourses in Polish social media uploaded successfully.
Document A Review Examining Biases in Workplace Hiring and Promotion Processes uploaded successfully.
Document LGBTQ Forced Migrants\u27 Labor Market Integration In Mexico City: Perspectives From Mexico\u27s Government Agencies, International Organizations, and Mexican Civil Society uploaded successfully.
Error uploading document Engaging diversity: best practices to create an inclusive work environment: [{'message': 'RowData length exceeds max size of: 256000, was: 329879 (code: c2b5a244-e5a1-46ff-a9dd-d481e7a454fd)', 'failedDataRows': [{'rowData': 'Pepperdine University Pepperdine Digital Commons Theses and Dissertations 2016 Engaging diversity: best practices to create an inclusive work environment Britta M. Wilson Follow this and additional works at: https://digitalcommons.pepperdine.edu/etd Recommended Citation Wilson, Britta M., "En

There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details


Document Affordable Housing for Austin’s Homeless Population uploaded successfully.
Document Motivations, Expectations and Experiences of Genital Piercings in the Transgender Community: An Exploratory Study uploaded successfully.
Error uploading document In Pursuit of Diversity in the CUNY Library Profession: An Effective Approach to Leadership in Academic Libraries: [{'message': 'Cannot determine value for RowData:  (code: f2eca2c9-ef2f-40e8-ac1b-f12e8811bb64)', 'failedDataRows': [{'rowData': 'Unresolvable row data', 'globalKey': '07a45d04-79e2-4e64-8ce3-5a0a4231aa32', 'metadata': [{'schemaId': 'clui7ccoo00hc07zjbrxp11zr', 'value': 'In Pursuit of Diversity in the CUNY Library Profession: An Effective Approach to Leadership in Academic Libraries', 'name': None}, {'schemaId': 'clui7cle400nd0706ectc9b48', 'value': 'Sanchez-Rodriguez, Nilda Alexandra', 'name': None}, {'schemaId': 'clui7w0yy0096072x686xd4zd', 'value': '2020-12-12T08:00:00+00:00', 'name': None}, {'schemaId': 'clui7d9yo005t0

There are errors present. Please look at `task.errors` for more details


Document The Business Boost from Marriage Equality: Evidence from the Health and Marriage Equality in Massachusetts Survey uploaded successfully.
Document Children with Trans Parents: Parent–Child Relationship Quality and Psychological Well-being uploaded successfully.
Document Extended Foreplay in a Time of Discontent uploaded successfully.
Document "We have to be alive in order to marry": Black LGBTT Youth and Geographies of Violence in Rio de Janeiro, Brazil uploaded successfully.
Document Introduction to LGBTQ America Today uploaded successfully.
Document Reevaluating Religion: A Case for Inclusivity of LGBTQ Christians in the Church uploaded successfully.
Document The Cord (October 8, 2014) uploaded successfully.
Document The Impact of Discrimination Against The LGBTQ Community. uploaded successfully.
Document ‘We treat them all the same’: the attitudes, knowledge and practices of staff concerning old/er lesbian, gay, bisexual and trans residents in care homes uploaded successfull

There are errors present. Please look at `task.errors` for more details


Document Hos in the garden: staging and resisting neoliberal creativity uploaded successfully.
Document Black Queer Activism During the Aids Epidemic uploaded successfully.
Document Bi- and Pan-Sexual Queer Erasure in Heteronormative Presenting Relationships: Too Queer or Not Queer Enough? uploaded successfully.
Document Narratives of resistance: (Re) Telling the story of the HIV/AIDS movement – Because the lives and legacies of Black, Indigenous, and People of Colour communities depend on it uploaded successfully.
Document Mediatization and sexuality : an invitation to a deep conversation on values, communicative sexualities, politics and media uploaded successfully.
Document The State of Theory in LGBTQ Aging: Implications for Gerontological Scholarship uploaded successfully.
Document Minutes, College of Liberal Arts Faculty Meeting, Thursday, November 29, 2018 uploaded successfully.
Error uploading document Affirmative Counseling with LGBQQIA Individuals:  A Training and Resource Ma

There are errors present. Please look at `task.errors` for more details


Document Resilience & Community: Supporting Immigrant Communities Through FFFs COVID-19 Response Funding uploaded successfully.
Document Employee Engagement and Marginalized Populations uploaded successfully.
Document Ripping Up The Rulebook : Challenges and Opportunities in Moving Beyond the Binary uploaded successfully.
Error uploading document Family, Unvalued: Discrimination, Denial, and the Fate of Binational Same-Sex Couples under U.S. Law: [{'message': 'RowData length exceeds max size of: 256000, was: 433867 (code: c95d549d-656b-476f-a764-b8561c0d7e2e)', 'failedDataRows': [{'rowData': '                           Family, Unvalued Discrimination, Denial, and the Fate of  Binational Same-Sex Couples under U.S. Law                     Copyright © 2006 Human Rights Watch/Immigration Equality    All rights reserved.  Printed in the United States of America      ISBN: 1-56432-336-6 Cover photos: © 2006 Private      Cover design by Rafael Jimenez  Immigration Equality  350 West 31st Str

There are errors present. Please look at `task.errors` for more details


Document Multi-version software reliability through fault-avoidance and fault-tolerance uploaded successfully.
Error uploading document Where Are All The Women?: Understanding The Factors Influencing Potential Departure Intentions Of Women Coaches At The NCAA Division III Level: [{'message': 'RowData length exceeds max size of: 256000, was: 257592 (code: cce2f271-38ad-4658-98dc-263248e2265e)', 'failedDataRows': [{'rowData': 'The University of Maine DigitalCommons@UMaine Electronic Theses and Dissertations Fogler Library Spring 5-7-2021 Where Are All The Women?: Understanding The Factors Influencing Potential Departure Intentions Of Women Coaches At The NCAA Division III Level Jennifer Laney University of Maine, Jennifer.laney@maine.edu Follow this and additional works at: https://digitalcommons.library.umaine.edu/etd  Part of the Higher Education Commons, Leadership Studies Commons, and the Sports Studies Commons Recommended Citation Laney, Jennifer, "Where Are All The Women?: Understa

There are errors present. Please look at `task.errors` for more details


Document Resilience among LGBTQIA+ youth in out-of-home care:A scoping review uploaded successfully.
Document Creative Gender Expression Performativity As a Coping Mechanism for Minority Stress uploaded successfully.
Document Understanding Sibling Relationships in the Context of Gender Diversity uploaded successfully.
Document The organisation of sexuality and the sexuality of organisation: A genealogical analysis of sexual ‘inclusive exclusion’ at work uploaded successfully.
Document From \u3ci\u3eHeo\u3c/i\u3e to \u3ci\u3eZir\u3c/i\u3e: A History of Gender Expression in the English Language uploaded successfully.
Document “Tragic and Glorious Pages”: The Evolution of Intersex Rights in Russia and Reframing Law and Tradition to Advance Reform uploaded successfully.
Document Queer representation incorporated at “Him”, character of “The Powerpuff Girls" uploaded successfully.
Document A comparison of responses to single and repeated discrete choice questions uploaded successfully.
Error

There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details


Document The Urgent Need for Research and Interventions to Address Family-Based Stigma and Discrimination Against Lesbian, Gay, Bisexual, Transgender, and Queer Youth uploaded successfully.
Document Färgglada röster : centrerar HBTQ+ perspektiv i offentliga rum uploaded successfully.
Error uploading document Gender Assignment Surgery for Intersexed Infants: How the Substantive Due Process Right to Privacy Both Supports and Opposes a Moratorium: [{'message': 'Cannot determine value for RowData:  (code: 2a3ce594-d238-40ef-8e46-57e407db9fa3)', 'failedDataRows': [{'rowData': 'Unresolvable row data', 'globalKey': 'cf97dcd8-a900-487d-baf1-623564479834', 'metadata': [{'schemaId': 'clui7ccoo00hc07zjbrxp11zr', 'value': 'Gender Assignment Surgery for Intersexed Infants: How the Substantive Due Process Right to Privacy Both Supports and Opposes a Moratorium', 'name': None}, {'schemaId': 'clui7cle400nd0706ectc9b48', 'value': 'Aliabadi, Sara A.', 'name': None}, {'schemaId': 'clui7w0yy0096072x686xd4

There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details


Exception occurred while uploading document Evolving Media Coverage on Transgender Individuals: A Step towards Inclusivity: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Understanding inclusion in the retail industry:incorporating the majority perspective: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document ‘Open, and Always, Opening’: Trans- Poetics as a Methodology for (Re)Articulating Gender, the Body, and the Self ‘Beyond Language’: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Transgender families: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time win

There are errors present. Please look at `task.errors` for more details


Error uploading document Growing up Trans: Exploring the Positive School Experiences of Transgender Children and Young People: [{'message': 'RowData length exceeds max size of: 256000, was: 361167 (code: e2bfd65b-e9ef-40bb-bab8-60f292e22df6)', 'failedDataRows': [{'rowData': '   Growing Up Trans: Exploring the Positive School Experiences of Transgender Children and Young People    Matthew Leonard University of East London      A thesis submitted in partial fulfilment of the requirements of the University of East London for the Professional Doctorate in Educational and Child Psychology April 2019        i   “No word of a lie, my teacher walks into the classroom, goes, “Ladies and gentleman”. I am not— I kid you not, I was so God damn happy! … It was probably one of the best moments of being trans*, honestly!”  (Nightcrawler, 397–414)    ii Abstract Background While the exploration into transgender students’ experiences is a growing field within psychological and educational research, muc

There are errors present. Please look at `task.errors` for more details


Document Discrimination and resilience and the needs of people who identify as Transgender A narrative review of quantitative research studies uploaded successfully.
Exception occurred while uploading document Speak Louder: Gaining Support and Visibility for the GLBTQ Community: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Performing Asexuality through Narratives of Sexual Identity: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Gender and the Media in the Western Balkans: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document The Allergic Bodies Conference: Postgraduate/Postdoctoral Research Conference, 2010: You have e

There are errors present. Please look at `task.errors` for more details


Exception occurred while uploading document Strategic Leadership for Managing Diversity: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Parental Responses to Coming out by Lesbian, Gay, Bisexual, Queer, Pansexual, or Two‐Spirited People across Three Age Cohorts: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Towards Bi-Inclusive Policies: Suggestions Based on Research on Dutch Same-Sex Attracted Young People: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document "It's a comparison thing, isn't it?" : lesbian and bisexual women's accounts of how partner relationships shape their feelings about their body and appearance: Yo

There are errors present. Please look at `task.errors` for more details
There are errors present. Please look at `task.errors` for more details


Exception occurred while uploading document Mental Health of Black Transgender and Nonbinary Young People: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Dayton is Burning: A Survey of Drag History and Performance in Southwest and Central Ohio: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document Marriage on the Ballot: An Analysis of Same-Sex Marriage Referendums in North Carolina, Minnesota, and Washington During the 2012 Elections: You have exceeded 5000 requests in a 60 second time window.('You have exceeded 5000 requests in a 60 second time window.', None)
Exception occurred while uploading document An initial study on the importance of archiving to queer Filipinos: You have exceeded 5000 requests in a 60 second time window.('You have exce

In [16]:
from labelbox import Client, DataRowMetadataField, Dataset
from uuid import uuid4
import datetime

# Initialize Labelbox client with your API key
API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbHN4eGlrMTcwMDJrMDcxNTQ3ZXlkc25hIiwib3JnYW5pemF0aW9uSWQiOiJjbHN4eGlqOWMwMDJqMDcxNWR4cmkxa3Y4IiwiYXBpS2V5SWQiOiJjbHVpNzY3czgwMTMwMDd6NDJydng1eW5nIiwic2VjcmV0IjoiM2Y4MmMyNGQ0NDIzN2VhNGQ1MDZhMDlmN2U2NjliM2IiLCJpYXQiOjE3MTIwNTEzNTQsImV4cCI6MjM0MzIwMzM1NH0.-FA3t7q7ingRvqyGsbfXpvee9N4-FefB-LOdkdTLRgo"
client = Client(api_key=API_KEY)

# Create or select a dataset in Labelbox
dataset_name = "Bayard Corpus"
datasets = list(client.get_datasets(where=(Dataset.name == dataset_name)))

if datasets:
    dataset = datasets[0]  # If the dataset exists, use the first match
    print(f"Found existing dataset named '{dataset_name}'.")
else:
    # If not found, create a new dataset
    dataset = client.create_dataset(name=dataset_name)
    print(f"Created new dataset named '{dataset_name}'.")

# Create metadata schemas (if they don't exist)
metadata_schemas = {
    "Title": DataRowMetadataKind.string,
    "Authors": DataRowMetadataKind.string,
    "Published Date": DataRowMetadataKind.string,
    "DOI": DataRowMetadataKind.string,
    "Data Provider": DataRowMetadataKind.string,
    "Document Type": DataRowMetadataKind.string,
    "Tags": DataRowMetadataKind.string,  # Storing tags as a concatenated string
}

for name, kind in metadata_schemas.items():
    # Ensure each metadata schema exists
    try:
        metadata_schema = client.get_data_row_metadata_ontology().get_by_name(name)
        print(f"Found schema for {name}.")
    except:
        metadata_schema = client.get_data_row_metadata_ontology().create_schema(name=name, kind=kind)
        print(f"Created schema for {name}.")

# Assuming `documents_data` is a list of dictionaries containing the document info
assets = []
for doc in documents_data:
    # Concatenate tags into a single string
    tags_string = ", ".join(doc.get("tags", []))

    # Construct metadata fields for each document
    metadata_fields = [
        DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Title").uid, value=doc["title"]),
        DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Authors").uid, value=', '.join(doc["authors"])),
        # Treat Published Date as a string
        DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Published Date").uid, value=str(doc["published_date"])),
        DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("DOI").uid, value=doc["doi"]),
        DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Data Provider").uid, value=doc["data_provider_name"]),
        DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Document Type").uid, value=doc["document_type"]),
        DataRowMetadataField(schema_id=client.get_data_row_metadata_ontology().get_by_name("Tags").uid, value=", ".join(doc["tags"])),
]

    # Add the document as an asset for bulk import
    assets.append({
        "row_data": doc["fulltext"],
        "global_key": str(uuid4()),  # Ensure uniqueness
        "media_type": "TEXT",
        "metadata_fields": metadata_fields,
    })

# Bulk upload prepared assets to the dataset
task = dataset.create_data_rows(assets)
task.wait_till_done()

if task.errors:
    print(f"Errors during import: {task.errors}")
else:
    print("All documents were successfully imported.")



Found existing dataset named 'Bayard Corpus'.
Found schema for Title.
Found schema for Authors.
Found schema for Published Date.
Found schema for DOI.
Found schema for Data Provider.
Found schema for Document Type.
Found schema for Tags.


LabelboxError: Failed to upload, unknown cause('Failed to upload, unknown cause', JSONDecodeError('Expecting value: line 1 column 1 (char 0)'))