In [None]:
from typing_extensions import override
from openai import AssistantEventHandler, OpenAI
import os
from dotenv import load_dotenv
import json
import pickle
import pandas as pd
import bibtexparser as bp
import re

load_dotenv()
API_KEY = os.getenv("GPT_API")

In [3]:
try:
    client = OpenAI(
        api_key=API_KEY)
except:
    print("Client did not initialize, exiting")
    pass
def pd_length_print(pd_array):
    return sum(pd_array.map(lambda x : len(x))) 

In [4]:
json_file = client.files.create(
  file=open("templates.json", "rb"),
  purpose="assistants"
)

In [38]:
sorted_data : pd.DataFrame = pd.read_pickle("../../ISIDM/sorted_references.pkl")
sorted_data = sorted_data.reset_index()

In [39]:
# sorted_data = sorted_data.reset_index()
print(sorted_data['references'])

0       Appleton, J. 1999. “Reflections of a former p...
1      Appleton, Jon. 1990 .“Composer pour de nouveau...
2      Arveiller, Jacques, Marc Battier, and Giuseppe...
3      Battier, M. 1995. “Entre l'idée et l'œuvre. Pa...
4      Battier, M. Ed. 1999. Aesthetics of Live Elect...
                             ...                        
815    Wilson, S. 2002.Information Arts: A Survey of ...
816    Winkler, T. 1995. “Making Motion Musical: Gest...
817    Winkler, T. 1997. “Creating Interactive Dance ...
818    Wood, J., (ed). 1998.The Virtual Embodied: Pre...
819    Yang, Z., B., Yu., R., Diankov, W., Wu, R., Ba...
Name: references, Length: 820, dtype: object


In [96]:
assistant = client.beta.assistants.create(
    name="Reference to Json Bib Converter",
    instructions="You are an expert schollar that is capable of formatting direct references into json formated bib references. Based on the reference templates listed in the attached file, can you format the provided references into bibtex references. If an error occurs where you cannot format it, please return the error number 666. For the Authors Names, keep the format of Last Name and their First Name in initials. Have the key of the bibtex string be Last Name of the first Author, year, and a random word in the title. Finally, please only return the bibtext entries.",
    model="gpt-4o",
    tools=[{"type": "file_search"}],
)

In [55]:
# Create a vector store caled "Financial Statements"
vector_store = client.beta.vector_stores.create(name="Json bib references")
 
# Ready the files for upload to OpenAI
file_paths = ["templates.json", ]
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


In [56]:
assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

In [19]:
print(file_batch.file_counts)

FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


In [57]:
vector_stores = client.beta.vector_stores.list()
print(vector_stores)

SyncCursorPage[VectorStore](data=[VectorStore(id='vs_KMvsBPDVgv6WoIKCWl0ZRrIi', created_at=1719415476, file_counts=FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1), last_active_at=1719415478, metadata={}, name='Json bib references', object='vector_store', status='completed', usage_bytes=3764, expires_after=None, expires_at=None), VectorStore(id='vs_gXUucweYPvixZqjXcroXw6GY', created_at=1719415091, file_counts=FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1), last_active_at=1719415092, metadata={}, name=None, object='vector_store', status='completed', usage_bytes=3764, expires_after=ExpiresAfter(anchor='last_active_at', days=7), expires_at=1720019892), VectorStore(id='vs_bqdYO10WVaN6rTyxNxqg3MRp', created_at=1719413271, file_counts=FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1), last_active_at=1719415092, metadata={}, name='Json bib references', object='vector_store', status='completed', usage_bytes=3764, expires_after=

In [195]:
def blocking_thread_process(ref):
    thread = client.beta.threads.create(
        messages=[
            {
              "role": "user",
              "content": [
                {
                  "type": "text",
                  "text": ref
                },
              ],
                "attachments": [
                { "file_id": json_file.id, "tools": [{"type": "file_search"}] }
              ],
            }
          ]
    )
    run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=assistant.id
    )
    
    messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
    
    message_content = messages[0].content[0].text
    annotations = message_content.annotations
    citations = []
    for index, annotation in enumerate(annotations):
        message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
        if file_citation := getattr(annotation, "file_citation", None):
            cited_file = client.files.retrieve(file_citation.file_id)
            citations.append(f"[{index}] {cited_file.filename}")

    result = message_content.value.replace("bibtex\n","")
    print(result)
    return(result)
    


    

In [191]:
def bibchecker(ref_list,bib_string):
    ### Checks to see if the if the block of bib entries are properly processed. Originally is meant to catch the 666 error.
    missing_refs = []
    split_bib = bib_string.split("666")
    index = len(re.findall(r"@",split_bib[0]))
    return ref_list[index]

In [202]:
ref_array = []
failed_list = []
concated_list = [] 
coont = 0
for idx, ref in enumerate(sorted_data['references'].to_list()):
    if coont == 0 :
        concated_list = [] 
    concated_list.append(ref)
    if len(concated_list) == 10:
        print("yes")
        string_ref = blocking_thread_process("\n".join(concated_list)).replace("\n","")
        while len(concated_list) != len(re.findall(r"@",string_ref)):
            if "666" in string_ref:
                failed_list.append(bibchecker(concated_list,string_ref))
                break
            string_ref = blocking_thread_process("\n".join(concated_list))
        print(string_ref)
        ref_array.append(string_ref)
    coont = (coont + 1) % 10


yes
```@article{Appleton1999Reflections,  author = {Appleton, J.},  title = {Reflections of a former perfomer of electroacoustic music},  journal = {Contemporary Music Review},  year = {1999},  volume = {18},  number = {3},  pages = {5-8},  note = {(with audio excerpts on the accompanying CD)},  editor = {Marc Battier},  booktitle = {Aesthetics of Live Electronic Music}}@article{Appleton1990Composer,  author = {Appleton, Jon},  title = {Composer pour de nouveaux instruments},  journal = {Contrechamps},  year = {1990},  number = {n¡ 11},  booktitle = {Musiques électroniques},  publisher = {Editions l'âge d'homme}}@book{Arveiller1976Computer,  author = {Arveiller, Jacques and Battier, Marc and Englert, Giuseppe},  title = {A computer Music Repertory},  year = {1976},  publisher = {I quaderni dell'autunno musicale 4 and Artinfo/musinfo 23-24-25, Groupe art et informatique de Vincennes},  address = {Paris, Como (Italy)},  note = {128 p.}}@incollection{Battier1995Entre,  author = {Battier, 

In [239]:
count = 0
lib = bp.parse_string("".join(ref_array))

lib_array = [lib]
while len(lib_array[count].failed_blocks) > 0:
    failed_string = "".join([x.raw for x in lib_array[count].failed_blocks])
    print(failed_string)
    lib_array.append(bp.parse_string(failed_string))
    count += 1

Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknow

@incollection{Flety2000Gesture,  author = {Fléty, E.},  year = {2000},  title = {3D Gesture Acquisition Using Ultrasonic Sensors},  booktitle = {Trends in Gestural Control of Music},  editor = {Wanderley, M. and Battier, M.},  publisher = {IRCAM – Centre Pompidou},  address = {Paris},  pages = {193-207}}@incollection{Tanaka2000Musical,  author = {Tanaka, A.},  title = {Musical performance practice on sensor-based instruments},  booktitle = {Trends in Gestural Control of Music},  editor = {Wanderley, M. and Battier, M.},  year = {2000},  publisher = {IRCAM – Centre Pompidou},  address = {Paris}}@article{Wanderley2002Evaluation,  author = {Wanderley, M. M. and Orio, N.},  title = {Evaluation of Input Devices for Musical Expression: Borrowing Tools from HCI},  journal = {Computer Music Journal},  year = {2002},  volume = {26},  number = {3},  pages = {62-76}}@inproceedings{Bertini1992Light,  author       = {Bertini, G. and P. Carosi},  title        = {Light Baton: a System for Conducting 

In [249]:
# print(lib_array[1].failed_blocks)

for key, value in lib_array[2].entries_dict.items():
    print(value)
    print(lib_array[1].entries_dict[key])


Entry (line: 0, type: `article`, key: `Wanderley2002Evaluation`):
	`author` = `Wanderley, M. and Orio, N.`
	`year` = `2002`
	`title` = `Evaluation of Input Devices for Musical Expression: Borrowing Tools from HCI`
	`journal` = `Computer Music Journal`
	`volume` = `26`
	`number` = `3`
	`pages` = `62-76`
Entry (line: 0, type: `article`, key: `Wanderley2002Evaluation`):
	`author` = `Wanderley, M. M. and Orio, N.`
	`title` = `Evaluation of Input Devices for Musical Expression: Borrowing Tools from HCI`
	`journal` = `Computer Music Journal`
	`year` = `2002`
	`volume` = `26`
	`number` = `3`
	`pages` = `62-76`
Entry (line: 0, type: `phdthesis`, key: `Hunt1999Radical`):
	`author` = `Hunt, A.`
	`title` = `Radical User Interfaces for Real-time Musical Control`
	`school` = `University of York UK`
	`year` = `1999`
	`type` = `DPhil thesis`
Entry (line: 0, type: `phdthesis`, key: `Hunt1999Radical`):
	`author` = `Hunt, A.`
	`title` = `Radical User Interfaces for Real-time Musical Control`
	`year` = `

In [243]:
print(len(lib.entries), len(lib.failed_blocks), len(lib.entries) + len(lib.failed_blocks))

769 51 820


In [252]:
new_file = bp.write_string(lib).replace("`","")
f = open("bib_entries.bib", 'w')
f.write(new_file)

Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknow

233304

In [250]:
count = 0
for lib_sec in lib_array:
    new_file = bp.write_string(lib).replace("`","")
    f = open("bib_entries" + str(count) + ".bib", 'w')
    f.write(new_file)
    count += 1

Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknown block type <class 'bibtexparser.model.DuplicateBlockKeyBlock'>
Unknow

In [192]:
# print(len(re.findall(r"@",ref_array)))
test_string = '''
@book{Buxton1977Computer,
  editor = {Buxton, William},
  title = {Computer Music 1976/77: a Directory to Current Work},
  year = {1977},
  publisher = {Canadian Commission for UNESCO},
  address = {Ottawa},
  pages = {239}
}

@article{Cadoz1984Responsive,
  author = {Cadoz, C., et al},
  title = {Responsive Input Devices and Sound Synthesis by Simulation of Instrumental Mechanisms: The Cordis System},
  journal = {Computer Music Journal},
  year = {1984},
  volume = {8},
  number = {3},
  pages = {60-73}
}

666

@incollection{Chadabe1980Solo,
  author = {Chadabe, J.},
  title = {Solo: a Specific Example of Realtime Performance},
  booktitle = {Computer Music - Composition musicale par ordinateur},
  editor = {M. Battier et B. Truax},
  year = {1980},
  publisher = {Commission Canadienne pour l'UNESCO},
  address = {Ottawa},
  pages = {87-94}
}'''
bad_test_list = sorted_data['references'].to_list()[6:10] 
val = bibchecker(bad_test_list, test_string.replace("\n",""))
print(val)

Cadoz, C.e. 1991. “Timbre et causalité.” In Le timbre: métaphore pour la composition. Jean-Baptiste Barriére, ed., Paris: IRCAM/Christian Bourgois, . pp. 17-46.


In [None]:
print(ref_array[0])

In [59]:
# bibtexed_ref = blocking_thread_process(sorted_data['references'][0])

ref_array = []
for ref in sorted_data['references']:
    string_ref = blocking_thread_process(ref)
    if "666" in string_ref:
        print("Oh NO!\n", ref)
    else:
        ref_array.append(string_ref)

```@article{Appleton1999Reflections,
  author = {Appleton, Jon},
  title = {Reflections of a former perfomer of electroacoustic music},
  journal = {Contemporary Music Review},
  year = {1999},
  volume = {18},
  number = {3},
  pages = {5-8},
  editor = {Marc Battier},
  note = {with audio excerpts on the accompanying CD},
  booktitle = {Aesthetics of Live Electronic Music}
}
```
```@article{Appleton1990Composer,
  title = {Composer pour de nouveaux instruments},
  author = {Appleton, Jon},
  year = {1990},
  journal = {Contrechamps n¡ 11 - Musiques électroniques},
  publisher = {Editions l'âge d'homme}
}
```
```@book{Arveiller1976Computer,
  author = {Arveiller, Jacques and Battier, Marc and Englert, Giuseppe},
  title = {A Computer Music Repertory},
  year = {1976},
  publisher = {I quaderni dell'autunno musicale 4 and Artinfo/musinfo 23-24-25, Groupe art et informatique de Vincennes},
  address = {Paris, Como (Italy)},
  pages = {128}
}
```
```@incollection{Battier1995Entre,
  auth

IndexError: list index out of range