In [61]:
import numpy as np
import pandas as pd
from groq import Groq
import os
import time

In [62]:
df = pd.read_excel('../../data/OMIEC_07_08_24.xls')

In [63]:
df['Abstract']

0      Thin films of organic mixed ionic electronic c...
1      n-Type organic electrochemical transistors (OE...
2      Mixed ionic-electronic conductors, such as pol...
3      The field of organic mixed ionic-electronic co...
4      The conversion of electrochemical processes in...
                             ...                        
870                                                  NaN
871                                                  NaN
872                                                  NaN
873    Three cases of familial benign chronic pemphig...
874    An in vitro technique was developed for the is...
Name: Abstract, Length: 875, dtype: object

In [64]:
GROQ_API_KEY= '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'

In [65]:
df.columns

Index(['Publication Type', 'Authors', 'Book Authors', 'Book Editors',
       'Book Group Authors', 'Author Full Names', 'Book Author Full Names',
       'Group Authors', 'Article Title', 'Source Title', 'Book Series Title',
       'Book Series Subtitle', 'Language', 'Document Type', 'Conference Title',
       'Conference Date', 'Conference Location', 'Conference Sponsor',
       'Conference Host', 'Author Keywords', 'Keywords Plus', 'Abstract',
       'Addresses', 'Affiliations', 'Reprint Addresses', 'Email Addresses',
       'Researcher Ids', 'ORCIDs', 'Funding Orgs', 'Funding Name Preferred',
       'Funding Text', 'Cited References', 'Cited Reference Count',
       'Times Cited, WoS Core', 'Times Cited, All Databases',
       '180 Day Usage Count', 'Since 2013 Usage Count', 'Publisher',
       'Publisher City', 'Publisher Address', 'ISSN', 'eISSN', 'ISBN',
       'Journal Abbreviation', 'Journal ISO Abbreviation', 'Publication Date',
       'Publication Year', 'Volume', 'Issue', 'Pa

In [66]:
# groq
client = Groq(
    api_key=GROQ_API_KEY,
)

# respostas
responses_list = []

# todos os abstracts
total = len(df['Abstract'])
total_time = 0


for index, row in df.iterrows():
    start_time = time.time()

    # tempo
    print('\n', index, 'of', total, 'remaining estimated time', (total-index)*(total_time/(index+1)))

    # chat request
    completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[
            {
                "role": "system",
                "content": "RolePlay as a bot seeking for polymers of the type OMIEC, also known as Organic Mixed Ionic Electronic Conductors."
            },
            {
                "role": "user",
                "content": "For the text I will input next, output only polymers of the type OMIEC, also known as Organic Mixed Ionic \
                    Electronic Conductors. Do not output other types of polymers, and other kinds of materials, only the polymers.\
                    The output must be only the polymer material names separated by semicolons. Do not repeat the polymer name more \
                    than once. Do not output 'OMIEC' or 'OMIEC based', tell me the name of the polymer that is of this type. \
                    Output only the polymers mentioned in the text. If no polymer was found, output 'None'" 
            },
            {
                "role": "assistant",
                "content": "Hello, I will tell the OMIEC polymers of any text you input next"
            },
            {
                "role": "user",
                "content": f"{row['Abstract']}"  # Use f-string to format the abstract
            }
        ],
        temperature=1,
        max_tokens=1024,
        top_p=1,
        stream=True,
        stop=None,
    )

    # coletar respostas
    response = ""

    # Stream the completion result and print the content
    for chunk in completion:
        response += chunk.choices[0].delta.content or ""
        print(chunk.choices[0].delta.content or "", end="")

    print()
    
    # listas de respostas
    responses_list.append({ 'index': index, 'response': response })

    # tempo total
    total_time += time.time() - start_time
    


 0 of 875 remaining estimated time 0.0
PEDOT; EDOT

 1 of 875 remaining estimated time 247.402006149292
Polymer; Poly(3,4-ethylenedioxythiophene) (PEDOT); Poly(3,4-ethylenedioxylthiophene)/Poly(ethylene oxide) (PEDOT/PEO); Poly(3,4-ethylenedioxylthiophene)/Poly(spirobenzopyrane) (PEDOT/PSB);

 2 of 875 remaining estimated time 323.6271028518677
PEDOT:PSS; PEDOT:PolyDADMA FSI; PEDOT:PolyDADMA TFSI; PEDOT:PolyDADMA CF3SO3; PEDOT:PolyDADMA Tos; PEDOT:PolyDADMA FSI/[C(2)mpyr][FSI]

 3 of 875 remaining estimated time 368.9504199028015
Conjugated Polymers; Poly(3,4-ethylenedioxythiophene); Poly(aniline-co-o-anisidine); Poly(thiophene); Poly(pyrrole-co-o-anisidine); Poly(phenylenevinylene); Poly(phenylene)s; Poly(p-phenylene vinylene)

 4 of 875 remaining estimated time 372.8326657295227
PEDOT:PSS

 5 of 875 remaining estimated time 360.46534180641174
Organic synaptic diode

 6 of 875 remaining estimated time 349.9796407563346
PEDOT:PSS

 7 of 875 remaining estimated time 347.8715751171112
P

In [13]:
# DataFrame
df_with_responses = pd.DataFrame(responses_list)

# salvar para .csv
df_with_responses.to_excel('../../dataframes/OMIEC_RESPONSES.xlsx', index=False)

#### Playground

In [36]:
result = pd.concat([df_with_responses, df], axis=1)
result.drop('index', axis=1, inplace=True)
result = result[result['response'] != 'None']
result = result.reset_index(drop=True)

In [42]:
result

Unnamed: 0,response,Publication Type,Authors,Book Authors,Book Editors,Book Group Authors,Author Full Names,Book Author Full Names,Group Authors,Article Title,...,Web of Science Index,Research Areas,IDS Number,Pubmed Id,Open Access Designations,Highly Cited Status,Hot Paper Status,Date of Export,UT (Unique WOS ID),Web of Science Record
0,PEDOT; EDOT,J,"Zhang, CH; Margotti, L; Decataldo, F; Piccioni...",,,,"Zhang, Chenhong; Margotti, Lorenzo; Decataldo,...",,,Organic Mixed Ionic Electronic Conductor Nanoc...,...,,Science & Technology - Other Topics; Materials...,,,,,,2024-08-07,WOS:001227571900001,0
1,Poly(thiophene-3-acetate-5-carboxylate); Poly(...,J,"Dai, HJ; Yue, W",,,,"Dai, Haojie; Yue, Wan",,,n-Type Organic Mixed Ionic-Electronic Conducto...,...,,Materials Science,,,,,,2024-08-07,WOS:001193211400001,0
2,PEDOT:PSS; PEDOT:PolyDADMA FSI; PEDOT:PolyDADM...,J,"Del Olmo, R; Casado, N; Olmedo-Martínez, JL; W...",,,,"Del Olmo, Rafael; Casado, Nerea; Olmedo-Martin...",,,Mixed Ionic-Electronic Conductors Based on PED...,...,,Polymer Science,,,,,,2024-08-07,WOS:000580020600001,0
3,"Polyvinylcarbazole; Poly(3,4-ethylenedioxythio...",J,"Kim, H; Won, Y; Song, HW; Kwon, Y; Jun, M; Oh, JH",,,,"Kim, Hyunwook; Won, Yousang; Song, Hyun Woo; K...",,,Organic Mixed Ionic-Electronic Conductors for ...,...,,Chemistry; Science & Technology - Other Topics...,,,,,,2024-08-07,WOS:001131799400001,0
4,PEDOT:PSS,J,"Bonafe, F; Decataldo, F; Cramer, T; Fraboni, B",,,,"Bonafe, Filippo; Decataldo, Francesco; Cramer,...",,,Ionic Solvent Shell Drives Electroactuation in...,...,,Chemistry; Science & Technology - Other Topics...,,,,,,2024-08-07,WOS:001176227200001,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
467,P3HT-OMIEC; PFO-OMIEC; PEDOT-OMIEC,J,"Abdel-Rahman, SZ; Ammenheuser, MM; Omiecinski,...",,,,"Abdel-Rahman, SZ; Ammenheuser, MM; Omiecinski,...",,,"Variability in human sensitivity to 1,3-butadi...",...,,Toxicology,,,,,,2024-08-07,WOS:000228398000021,0
468,PFBT; PBS; PEOIBT; PEO; PEDOT-PSS; PSSH; PANI-...,J,"Raaka, S; Hassett, C; Omiecinski, CJ",,,,"Raaka, S; Hassett, C; Omiecinski, CJ",,,Human microsomal epoxide hydrolase:: 5′-flanki...,...,,Oncology,,,,,,2024-08-07,WOS:000072481100002,0
469,(R)-cAMPS; (S)-cAMPS,J,"SIDHU, JS; OMIECINSKI, CJ",,,,"SIDHU, JS; OMIECINSKI, CJ",,,CAMP-ASSOCIATED INHIBITION OF PHENOBARBITAL-IN...,...,,Biochemistry & Molecular Biology,,,,,,2024-08-07,WOS:A1995QZ71100067,0
470,"Poly(3,4-ethylenedioxythiophene)-co-poly(ethyl...",J,"HASSETT, C; AICHER, L; SIDHU, JS; OMIECINSKI, CJ",,,,"HASSETT, C; AICHER, L; SIDHU, JS; OMIECINSKI, CJ",,,HUMAN MICROSOMAL EPOXIDE HYDROLASE - GENETIC-P...,...,,Biochemistry & Molecular Biology; Genetics & H...,,,,,,2024-08-07,WOS:A1994NA58300005,0


In [55]:
polímeros = result.iloc[1]['response'].split(';')

In [56]:
polímeros

['Poly(thiophene-3-acetate-5-carboxylate)',
 ' Poly(PTTA)',
 ' PEDOT:PSS',
 ' Poly(3,4-ethylene-dioxythiophene) (PEDOT)',
 ' Poly(3-hexylthiophene) (P3HT)',
 ' Poly[2-methoxy-5-(3′-ethoxyphenyl)-ethoxy)-thiophene] (METH)',
 ' Poly(3,4-propylenedioxythiophene) (PDOT)',
 ' Poly(3,4-ethylenedioxythiophene) (EDOT)',
 ' Poly(fluorene-co-benzothiadiazole) (PFBT)',
 ' Poly-(3-butylthiophene)(PBUT)',
 " Poly[2,2'-bithiophene-5-carboxylic acid] (PBTCA)",
 ' PT-PSS',
 ' Poly(thiophene) (PT)',
 ' Poly(3,4-phenylenethiophene) (PPhT)']

In [60]:
'Poly(PTTA)' in result.iloc[1]['Abstract']

False