In [1]:
import os
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv('./keys.env')

openai_api_key = os.environ['OPENAI_API_KEY']

client = OpenAI(
    # This is the default and can be omitted
    api_key=openai_api_key
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system", "content": "Say this is a test",
            "role": "user", "content": "",
        }
    ],
    model="gpt-4o-mini",
    max_tokens=10
)

print(chat_completion.choices[0].message)

ChatCompletionMessage(content='Hello! How can I assist you today?', refusal=None, role='assistant', function_call=None, tool_calls=None)


In [2]:
import chardet

folder_path = '../Transcripts/'

transcripts = {}

count = 0
print(f'There are {len(sorted(os.listdir(folder_path)))} files for our total transcript')
# Iterate through each file in the specified folder
for filename in sorted(os.listdir(folder_path)):
    # Check if the file is a .txt file
    if not filename.endswith('.txt'):
        print(f'{filename} is not a text file')
        #raise ValueError(f'Non txt file: {filename} in directory!')
    else:
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        # Open the file and read its contents into a string

        with open(file_path, 'rb') as f: 
            result = chardet.detect(f.read()) 
            encoding = result['encoding']
        # print(f'{filename}, {encoding}')
            
        with open(file_path, 'r', encoding = encoding) as file:
            transcript = file.read()

        transcripts[filename.split('.')[0]] = transcript

        count += 1


print(count)

There are 273 files for our total transcript
.ipynb_checkpoints is not a text file
272


In [3]:
def summarize_transcipt(transcript:str) -> str:
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": '''Read the following call transcript between a provider and a patient, generate a detailed summary in 4 following sections: 
            
            Patient Information: Key details about the patient discussed in the call.
            
            Symptoms Discussed: A list of symptoms the patient reported.
            
            Diagnosis and Assessment: The provider’s assessment and diagnosis of the patient’s condition.
            
            Treatment Plan: Details of the treatment plan or medications prescribed.
            '''},
            {"role": "user", "content": transcript}
        ],
        model="gpt-4o-mini",
        max_tokens=1000
    )

    return chat_completion.choices[0].message.content

In [4]:
summaries = {}

for key in transcripts:
    # Call the pre-defined function and store the result
    summary = summarize_transcipt(transcripts[key])

    print(key)
    print(summary)
    print(len(summary.split()))
    print()
        # Store the summary in the dictionary using the file name as the key
    filename=key.split('.')[0]
    summaries[filename] = summary

CAR0001
### Patient Information:
- **Gender:** Male  
- **Age:** 39  
- **Living Situation:** Lives alone in an apartment  
- **Occupation:** Accountant  
- **Lifestyle Habits:** Smokes about a pack of cigarettes a day for the past 10 to 15 years; occasionally uses cannabis (about 5 mg per week); drinks alcohol (approximately 10 drinks per week).  
- **Family Medical History:** Father had a heart attack at age 45 and experienced cholesterol problems; no known history of strokes or cancers in the family.  

### Symptoms Discussed:
- Chest pain located on the left side of the chest, started last night.  
- Pain is sharp and constant, rated as 7-8 on a scale of 10.  
- Lightheadedness.  
- Trouble breathing, which worsens with laying down and possibly taking deep breaths.  
- Minimal sweating due to breathing issues.  
- Neck appears slightly swollen (no reported neck pain).   

### Diagnosis and Assessment:
- The provider assessed that the patient is experiencing significant chest pain a

In [5]:
import pandas as pd
df=pd.DataFrame(summaries.items(), columns=['File Name', 'Summary'])
df.to_csv('new_summaries.csv', index='File Name')
df

Unnamed: 0,File Name,Summary
0,CAR0001,### Patient Information:\n- **Gender:** Male ...
1,CAR0002,### Patient Information:\n- **Patient Age:** N...
2,CAR0003,### Patient Information: \nThe patient is an a...
3,CAR0004,### Patient Information: \nThe patient is a 25...
4,CAR0005,### Patient Information:\nThe patient is an ad...
...,...,...
267,RES0213,### Patient Information\nThe patient is a 26-y...
268,RES0214,### Patient Information:\nThe patient is a fem...
269,RES0215,### Patient Information:\n- Patient: An 8-year...
270,RES0216,"### Patient Information:\nThe patient, a semi-..."


In [6]:
df=pd.read_csv('../Notebooks/new_summaries.csv')
print(df)
df.loc[df['File Name']=='RES0051']['Summary'].values[0]

     Unnamed: 0 File Name                                            Summary
0             0   CAR0001  ### Patient Information:\n- **Gender:** Male  ...
1             1   CAR0002  ### Patient Information:\n- **Patient Age:** N...
2             2   CAR0003  ### Patient Information: \nThe patient is an a...
3             3   CAR0004  ### Patient Information: \nThe patient is a 25...
4             4   CAR0005  ### Patient Information:\nThe patient is an ad...
..          ...       ...                                                ...
267         267   RES0213  ### Patient Information\nThe patient is a 26-y...
268         268   RES0214  ### Patient Information:\nThe patient is a fem...
269         269   RES0215  ### Patient Information:\n- Patient: An 8-year...
270         270   RES0216  ### Patient Information:\nThe patient, a semi-...
271         271   RES0217  ### Patient Information:\nThe patient, identif...

[272 rows x 3 columns]


"**Patient Information:**  \nThe patient is a 12-year-old individual in grade 6, living with their parents and two younger siblings. They do not have a notable medical history, have not been hospitalized or undergone surgeries, and take no regular medications. They have a potential allergy to penicillin, which resulted in a rash. They report feeling safe at home and do not engage in smoking or substance use.\n\n**Symptoms Discussed:**  \n- Persistent cough for the past 5 days, worsening over time  \n- Sore throat, starting around the same time as the cough  \n- Altered taste (noticing changes in taste for the past 2 days)  \n- Mild fatigue  \n- Possibly feeling hot at night, indicating a potential fever  \n- No blood in sputum, chest pain, or shortness of breath  \n- No difficulty with nasal congestion, runny nose, or other gastrointestinal symptoms  \n- No loss of smell or other systemic issues  \n\n**Diagnosis and Assessment:**  \nThe provider suspects an infectious process related t