In [141]:
from datetime import date
import pandas as pd
import boto3
import numpy as np
import os
import sys
import pprint
import pymysql
import json
import os
from io import StringIO, BytesIO
from pathlib import Path
from typing import List
import boto3
import pandas as pd
from tqdm import tqdm
from uuid import UUID

pymysql.install_as_MySQLdb()

s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
bucket_name = 'eleos-ahus'
sessions_name = 'sessions/collide_after_review/'
sessions_analysis = 'sessions/analysis-results-after-review/'
location_s3 = 's3://' + bucket_name + '/' + sessions_name + '{}.collide.csv'

# and set the environment for django connection and collection

os.environ['USER_PATH'] = '/home/gabrielkws/Eleos-Full/eleos'
sys.path.append(os.environ['USER_PATH'])
os.environ["RUN_LOCAL"] = "TRUE"
os.environ["DEBUG"] = "FALSE"

# and now set the connection to rds

from eleos_data_buddy.storage.rds import init_django_connection
init_django_connection(os.environ['USER_PATH'], 'eleos')

from eleos_rest.models.other import *

from eleos_data_buddy.data_analysis.session_analysis_preparation import is_session_valid_for_analysis

from analysis.models import SessionPipeline

from eleos_rest.models import Site, Therapist
from eleos_rest.models.generic_report import GenericGroupIndividualReport,GenericGroupReport
from eleos_rest.models.group_therapy.group_therapy_session import *
from eleos_rest.models.group_therapy.group_therapy_group import *
from eleos_data_buddy.data_analysis.query_database import get_detailed_notes
from eleos_data_buddy.data_analysis.options import get_all_session_suggestions
from eleos_data_buddy.data_analysis.constants import INVALID_ORGANIZATIONS, INVALID_MAIL_WORDS, TECHNOLOGY_TYPES

from eleos_data_buddy.data_analysis.similarity import check_similarity_from_list
from eleos_data_buddy.data_analysis.constants import ELEOS_TEXT_AC
from eleos_data_buddy.data_analysis.options import get_all_session_suggestions
from eleos_data_buddy.data_analysis.text_processing import remove_punctuation_with_condition, add_dot_at_the_end
from eleos_data_buddy.data_analysis.templates import adjust_templates_to_client_preferences
from eleos_data_buddy.storage.s3 import *

from eleos_data_buddy.data_analysis.eleos_text_calculations import get_eleos_suggestions_per_session, find_eleos_text_in_paragraph
from datetime import timedelta

from eleos_data_buddy.data_analysis.note_processing import clean_note
from eleos_data_buddy.data_analysis.query_database import get_notes_by_sessions
from eleos_data_buddy.data_analysis.session_analysis_preparation import is_session_valid_for_analysis
from django.db.models import Q

e


## extracting group sessions and individual notes

In [142]:
INVALID_GROUPS = ['playground', 'sales demo', 'eleos', 'test']
def is_group_valid(group_name):
    """
    :param organization: str
    :return: bool
    """
    if not [i for i in INVALID_GROUPS if i in group_name.lower()]:
        return True

In [143]:
def filter_group_valid(sessions):
    sessions['group_valid'] = sessions['group'].astype(str).apply(is_group_valid)
    return sessions[sessions['group_valid'] == True]

In [227]:


def get_group_sessions(start_date, end_date):
    sessions = GroupTherapySession.objects.filter(
        actual_start_ts__gte=start_date,
        actual_start_ts__lt=end_date
    )

    
    g_dict = {}
    for g in sessions:
            patients_list = [patient.name for patient in g.group_therapy_group.patients.all()]
            duration_seconds = (
                (g.actual_end_ts - g.actual_start_ts).total_seconds()
                if (g.actual_start_ts and g.actual_end_ts)
                else None
            )

        
            g_dict[g.id] = {
                'individual_session_id': g.id,
                'group': g.group_therapy_group,
                'session_start': g.actual_start_ts,
                'session_end': g.actual_end_ts,
                #'session duration': (g.actual_end_ts - g.actual_start_ts) if (g.actual_start_ts and g.actual_end_ts) else None,
                'session_duration':duration_seconds,
                'technology': g.technology_used,
                'is_note_confirmed': g.is_note_confirmed,
                'group_therapy_id': g.group_therapy_group_id,
                'is scribe':g.get_is_scribe(),
                'ehr':g.ehr_integration_fields,
                'saved and signed':g.saved_and_signed,
                'is note confirmed':g.is_note_confirmed,
                'therapists':[therapist.name for therapist in g.group_therapy_group.therapists.all()],
                
                
                'patients': patients_list,
                'number of patients':len(patients_list)
                
             
            }

    group_sess = pd.DataFrame(g_dict).T
    group_sessions=filter_group_valid(group_sess)
    return group_sessions


In [228]:


def is_mail_valid(mail):
    
    
    INVALID_MAIL_WORDS = ['eleos', 'test', 'demo', 'training', 'scribe', 'omri.zohar']
    if '@' in mail and not any([el for el in INVALID_MAIL_WORDS if
                                el in mail.lower()]):
        return True


In [229]:
def filter_valid_emails(sessions):
    sessions['is_mail_valid'] = sessions['therapist_mail'].astype(str).apply(is_mail_valid)
    return sessions[sessions['is_mail_valid'] == True]


In [230]:

def get_ind_notes_optimized(group_session, num_fields=30):
    note_parts = [getattr(group_session, f"text_field_{i+1}", None) for i in range(num_fields)]
    note_parts = [part for part in note_parts if part is not None]
    full_note = '. '.join(note_parts)
    return full_note


In [231]:
def total_info_individual_notes (sessions):

    ind_notes = GenericGroupIndividualReport.objects.filter(group_session_id__in=sessions['individual_session_id'].unique())


    combined_data=[]
    for s in ind_notes:
        
            session_id=s.group_session_id
            note=get_ind_notes_optimized(s)
            combined_data.append({
                'uuid':session_id,
                'individual_session_notes':note,
                'group_session_id': s.group_session.id,
                'therapist_mail':s.therapist,
                'therapist_id':s.therapist_id
            })

    combined_df= pd.DataFrame(combined_data)
    combined_df_filtered = filter_valid_emails(combined_df)
    return combined_df_filtered


In [232]:

def grouping_notes_per_session (combined_dataframe):
    grouped_notes=combined_dataframe.groupby('uuid')
    all_notes=[]
   
   

    for ga, gb in tqdm(grouped_notes):
        
        phrases= gb['individual_session_notes'].tolist()
        all_notes.append(phrases)

        
    
    group_notes=pd.DataFrame({
        'uuid':grouped_notes.groups.keys(),
        'eleos_notes':all_notes

        
        
    })
    group_notes['num_notes'] = group_notes['eleos_notes'].apply(len)
    return group_notes

In [233]:
# 1 - extracting group sessions
def group_sessions_individual_notes (start_date, end_date):    
    group_session_basics=get_group_sessions (start_date,end_date)
    #extarcting the  notes of those sessions extracted
    group_sessions_individual_notes=total_info_individual_notes(group_session_basics)
    group_sessions_individual_notes_grouped= grouping_notes_per_session(group_sessions_individual_notes)
    notes_info=pd.merge(group_session_basics,group_sessions_individual_notes_grouped,left_on='individual_session_id',right_on='uuid',how='right')
    columns_added=group_sessions_individual_notes['therapist_mail']
    notes_info['therapist_mail']=columns_added


    return notes_info
    
#start_date=datetime(2022,12, 21)
#end_date=datetime(2023,12, 21)
#resdataframe = group_sessions_individual_notes(start_date, end_date)    
#resdataframe

In [234]:
def group_sessions_individual_notes_basics (start_date, end_date):    
    group_session_basics=get_group_sessions (start_date,end_date)
    #extarcting the  notes of those sessions extracted
    group_sessions_individual_notes=total_info_individual_notes(group_session_basics)
    group_sessions_individual_notes_grouped= grouping_notes_per_session(group_sessions_individual_notes)
    notes_info=pd.merge(group_session_basics,group_sessions_individual_notes_grouped,left_on='individual_session_id',right_on='uuid',how='left')
    columns_added=group_sessions_individual_notes['therapist_mail']
    notes_info['therapist_mail']=columns_added


    return notes_info

In [235]:
resdataframe['individual_session_id'].nunique()

268

In [236]:
resdataframe.to_csv('/home/gabrielkws/Downloads/resdataframe.csv')

## extracting individual suggestions

In [237]:
def get_individual_eleos_suggestions(session_id_list):
    df_individual = pd.DataFrame()


    
    for uuid in tqdm(session_id_list):
        try:
            uuid = uuid
            path = f's3://eleos-ahus/dev.eleos.health/{uuid}/models/group-individual-summary/model.json'
            b, p = split_s3_path(path)  # Assuming split_s3_path is defined
            a = json_to_df(b, p)  # Assuming json_to_df is defined

            if len(a) > 0:
                a['uuid'] = uuid
                df_individual = pd.concat([df_individual, a])
        
        except:
            pass
            continue

    df_individual=df_individual.rename(columns={'summary':'eleos_suggestions'})
    df_individual = df_individual.drop(columns=['summary_plural'])
    df_individual = df_individual.reset_index(drop=True)
    
    return df_individual

In [238]:
def grouping_suggestions_per_session (df_individual):
    grouped_sessions=df_individual.groupby('uuid')
    all_suggestions=[]
    speakers_count=[]
   

    for ga, gb in tqdm(grouped_sessions):
        
        phrases= gb['eleos_suggestions'].tolist()
        all_suggestions.append(phrases)

        num_speakers=gb['speaker'].nunique()
        speakers_count.append(num_speakers)

        
    
    group_suggestions=pd.DataFrame({
        'uuid':grouped_sessions.groups.keys(),
        'eleos_individual_suggestions':all_suggestions,
        'speakers':speakers_count
        
        
    })

    group_suggestions['num_individual_suggestions'] = group_suggestions['eleos_individual_suggestions'].apply(len)
    return group_suggestions

In [239]:
def extract_individual_suggestions (group_sessions_individual_notes):
    session_id_list=group_sessions_individual_notes['uuid'].tolist()
    eleos_suggestions=get_individual_eleos_suggestions(session_id_list)
    #group_sessions_with_suggestions=pd.merge(group_sessions_individual_notes,eleos_suggestions,on='uuid',how='left')
    grouped_suggestions=grouping_suggestions_per_session(eleos_suggestions)
    return grouped_suggestions

#group_individual_suggestions=extract_individual_suggestions(result_dataframe)
#group_individual_suggestions

## extracting group notes

In [240]:
def get_group_notes(group_session, num_fields=29):
    note_parts = [getattr(group_session.genericgroupreport, f"text_field_{i+1}", None) for i in range(num_fields)]
    
    # Filtra los valores no nulos
    note_parts = [part for part in note_parts if part is not None]
    
    full_note = '. '.join(note_parts)
    
    return full_note


In [241]:
def give_group_notes(sessions_selected):
    group_notes={}
    for s in sessions_selected:
       # session_id=ind_notes.id
        try:
            g_notes=get_group_notes(s)
            group_notes[s]=g_notes
        except:
            pass
    return group_notes

In [242]:
def total_info_group_notes(sessions):
    combined_data=[]
    for s in sessions:
        session_id=s.id
        notes=get_group_notes(s)
        combined_data.append({
            'session_id':s.id,
            'group_notes':notes,
            'group_id':s.group_therapy_group.id,
            
        })
   
    combined_group_df= pd.DataFrame(combined_data)
    
    return combined_group_df

In [243]:
def grouping_group_notes_per_group (combined_dataframe):
    
    grouped_notes = combined_dataframe.groupby('session_id')
    
    all_notes = []
    for ga, gb in tqdm(grouped_notes):
        
        phrases = gb['group_notes'].tolist()
        all_notes.append(phrases)
    
    group_notes=pd.DataFrame({
        'session_id':grouped_notes.groups.keys(),
        'eleos_group_notes':all_notes
       
    })

    return group_notes


In [244]:
def group_notes_group_sessions(start_date,end_date):    
    sessions_group = GroupTherapySession.objects.filter()
    group_notes = give_group_notes(sessions_group)

    df_group_notes = pd.DataFrame(list(group_notes.items()), columns=['session_id', 'group_notes'])
    combined_dataframe_group= total_info_group_notes(group_notes)
    
    group_notes_from_group_sessions=grouping_group_notes_per_group(combined_dataframe_group)
    
    
    return group_notes_from_group_sessions

#group_sessions_group_notes=group_notes_group_sessions(start_date,end_date)
#group_sessions_group_notes

In [245]:
def group_suggestions (session_id_list):
    df_group = pd.DataFrame()

    for uuid in tqdm(session_id_list):
        try:
            uuid = uuid
        
        
            path_to_group = f's3://eleos-ahus/dev.eleos.health/{uuid}/models/group-summary/model.json'
            b, p = split_s3_path(path_to_group)
            a = json_to_df(b, p)
            if len(a)>0:
                a['session_id'] = uuid
                df_group = pd.concat([df_group, a])  
        except:
            pass
            continue
    
    df_group = df_group.drop(columns=['summary_plural'])
    df_group = df_group.reset_index(drop=True)

    return df_group

#session_id_list=result_dataframe['uuid'].tolist()
#suggest=group_suggestions (session_id_list)
#suggest

## suggestions in notes analysis

In [246]:

def find_eleos_text_in_paragraph(paragraph, eleos_text, ac=ELEOS_TEXT_AC):
    """
    :param paragraph: string
    :param eleos_text: list
    :param ac: float (0-1)
    :return: list of string of eleos generated sentences from text
    """
    ours = []
    for sentence in paragraph:
        if sentence in eleos_text:
            ours.append(sentence)
        else:
            score = check_similarity_from_list(eleos_text, sentence, ac=ac)
            if score:
                ours.append(sentence)
    return ours


In [247]:

def calculate_eleos_text_percentage_in_note(note, eleos_suggestions):
    """
    :param note: str
    :param section: note section name (key of note dict)
    :param eleos_suggestions: list of strings
    :return: dict
    """
    try:
        note = [s.strip() for s in note.split('.') if s.strip() != '']
        eleos_sentences = find_eleos_text_in_paragraph(paragraph=note, eleos_text=eleos_suggestions)
        note_length = len(note)
        if note_length==0:
            return 0, []
        number_of_eleos_sentences = len(eleos_sentences)
        added_sentences = [x for x in note if x not in eleos_sentences]
        percentage = number_of_eleos_sentences / note_length * 100
        
        return percentage, added_sentences
    except Exception as e:
        print(f"Error on merge:{str(e)}")
        return None, None

In [248]:
def calculating_suggestions_and_group_notes (suggestions,notes):
    try:
        merged_df_group=pd.merge(notes,suggestions,on='session_id')
        merged_df_group[['session_id','eleos_group_notes']]=notes[['session_id','eleos_group_notes']]
        merged_df_group[['session_id','eleos_group_suggestion_notes']]=suggestions[['session_id','summary']]
        merged_df_group[['eleos_group_suggestion_rate', 'added_group_sentences']]=merged_df_group.apply(lambda row: calculate_eleos_text_percentage_in_note(''.join(row['eleos_group_notes']),
                                                                                       row['eleos_group_suggestion_notes'] 
                                                                                      ),
                            axis=1,
                            result_type='expand')
                                                           
        
        
        return merged_df_group

    except Exception as e:
        print(f"Error on merge:{str(e)}")
        return None
 
  
#df_combined_group=calculating_suggestions_and_group_notes(suggest,group_sessions_group_notes)
#df_combined_group

In [249]:

def calculating_suggestions_and_notes (suggestions,notes):
    try:
        merged_df=pd.merge(notes,suggestions,on='uuid')
        merged_df[['group_session_id','individual_session_notes','num_individual_notes']]=notes[['individual_session_id','eleos_notes','num_notes']]
        merged_df[['uuid','eleos_individual_suggestions','num_individual_suggestions']]=suggestions[['uuid','eleos_individual_suggestions','num_individual_suggestions']]
        merged_df[['eleos_individual_suggestion_rate','added_group_sentences']]=merged_df.apply(lambda row: calculate_eleos_text_percentage_in_note(''.join(row['individual_session_notes']),
                                                                                       row['eleos_individual_suggestions'] 
                                                                                      ),
                            axis=1,
                            result_type='expand')
                                                           
        
        
        return merged_df

    except Exception as e:
        print(f"Error on merge:{str(e)}")
        return None
#df_combined_individual=calculating_suggestions_and_notes(group_individual_suggestions,result_dataframe)
#df_combined_individual
    
    

### speakers analysis

In [250]:
def get_individual_session_speakers_data(notes_suggestions_analysis):
    
    sessions_for_suggestions=notes_suggestions_analysis['session_id'].unique()
    speakers_analysis = pd.DataFrame()

    for uuid in tqdm (sessions_for_suggestions):
        try:
            uuid = uuid
            path = f's3://eleos-ahus/dev.eleos.health/{uuid}/streaming/audio/diarization/model.json'
            b, p = split_s3_path(path)  # Assuming split_s3_path is defined
            a = json_to_df(b, p)  # Assuming json_to_df is defined

            if len(a) > 0:
                a['uuid'] = uuid
                speakers_analysis = pd.concat([speakers_analysis, a])
        
        except :
            pass
            
            continue
    summary_data = speakers_analysis.groupby('uuid').agg(
        total_speakers=('total_speakers','mean'),
        total_talk_time=('total_talk_time','mean'),
        speakers_less_than_threshold=('speakers_less_than_threshold','mean'),
        confidence_therapist_level=('confidence_therapist_level','mean')
    ).reset_index()
    
    
    return summary_data

#speak=get_individual_session_speakers_data(notes_suggestions_analysis)
#speak

## final 

In [259]:
# 1 - extracting group sessions
def group_sessions_analysis (start_date, end_date):
    
    result_dataframe=group_sessions_individual_notes (start_date, end_date)   
    group_individual_suggestions=extract_individual_suggestions (result_dataframe)
    group_sessions_group_notes=group_notes_group_sessions(start_date,end_date)
    session_id_list=result_dataframe['uuid'].tolist()
    suggest=group_suggestions (session_id_list)
    individual_notes_and_suggestions= pd.merge(group_individual_suggestions, result_dataframe,on='uuid', how='right')
    group_notes_and_suggestions= pd.merge(group_sessions_group_notes,suggest,on='session_id',how='left')

    df_combined_individual=calculating_suggestions_and_notes(group_individual_suggestions,result_dataframe)
    df_combined_group=calculating_suggestions_and_group_notes(suggest,group_sessions_group_notes)
    columns_to_erase_from_merge=['model_name','text_temp','summary','summary_id']
    notes_suggestions_analysis=pd.merge(df_combined_group,df_combined_individual,left_on='session_id',right_on='individual_session_id',how='left')
    
    notes_suggestions_analysis = notes_suggestions_analysis.drop(columns=columns_to_erase_from_merge)
    speak=get_individual_session_speakers_data(notes_suggestions_analysis)
    sessions_with_report=pd.merge(notes_suggestions_analysis,speak,left_on='session_id',right_on='uuid',how='left')
    sessions_total=group_sessions_individual_notes_basics (start_date, end_date) 
    final_analysis=pd.merge(sessions_total,sessions_with_report,left_on='individual_session_id',right_on='session_id',how='left')

    final_analysis=final_analysis.rename(columns={'individual_session_x':'individual_session','group_x':'group','session_start_x':'session_start','session_end_x':'session_end',
                                                  'session_duration_x':'session_duration_seconds','technology_x':'technology used','is_note_confirmed_x':'is note confirmed',
                                                  'group_therapy_id_x':'group_id','ehr_x':'ehr','therapists_x':'therapists','patients_x':'patients','number of patients_x':'num patients',
                                                  'group_valid_x':'group_valid','eleos_notes_x':'eleos_notes','num_notes_x':'number of notes','therapist_mail_x':'therapist mail'})
                                                  
    final_analysis= final_analysis.drop(['uuid','session_id','individual_session_id_y','group_y','session_start_y','session_end_y',
                                         'is note confirmed_y',	'therapists_y',	'patients_y','number of patients_y','group_valid_y','uuid_x','eleos_notes_y','num_notes_y'], axis=1)
    
    
    
    return final_analysis

#start_date=datetime(2023,11, 21)
#end_date=datetime(2023,12, 21)
#result_dataframe = group_sessions_analysis(start_date, end_date)    
#result_dataframe





In [260]:
start_date=datetime(2023, 1, 21)
end_date=datetime(2023,12, 21)
result_dataframe = group_sessions_analysis(start_date, end_date)    
result_dataframe 


100%|██████████| 40/40 [00:00<00:00, 6847.00it/s]
100%|██████████| 40/40 [00:06<00:00,  6.11it/s]
100%|██████████| 23/23 [00:00<00:00, 3134.45it/s]
100%|██████████| 87/87 [00:00<00:00, 9950.76it/s]
100%|██████████| 40/40 [00:06<00:00,  6.27it/s]
100%|██████████| 27/27 [00:03<00:00,  6.80it/s]
100%|██████████| 40/40 [00:00<00:00, 8487.06it/s]


Unnamed: 0,individual_session_id_x,group,session_start,session_end,session_duration_seconds,technology used,is note confirmed,group_id,is scribe_x,ehr,...,group_session_id,individual_session_notes,num_individual_notes,eleos_individual_suggestion_rate,added_group_sentences_y,uuid_y,total_speakers,total_talk_time,speakers_less_than_threshold,confidence_therapist_level
0,00db3229-1cfb-46fd-a0b3-c767111050d6,Gaudenzia-Dresher-WG Evening Group - 9ec10cf7-...,2023-09-12 19:56:13+00:00,2023-09-12 21:18:34+00:00,4941.0,zoom_vendors_live,False,9ec10cf7-b2e8-4ea4-bb27-31f1000772bb,False,,...,,,,,,,,,,
1,01d854dd-7302-47d0-8bed-070967cd10a3,Gaudenzia-Montgomery-Wednesday OP - 28de2a23-1...,2023-07-20 21:07:38+00:00,2023-07-22 03:07:37+00:00,107999.0,zoom_vendors_live,False,28de2a23-1972-4e66-bacc-9d733c27421d,False,,...,,,,,,,,,,
2,02d06daf-0643-41a5-a1c9-47c22395a26b,Gaudenzia - Coatesville - Mindfulness Mondays ...,2023-11-20 16:37:23+00:00,2023-11-20 16:39:32+00:00,129.0,zoom_vendors_live,False,923762e8-4907-4913-a6e5-657cc114d6d0,True,,...,,,,,,,,,,
3,02eccf40-d16f-4bc6-a630-918645d6b161,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-11-08 13:59:09+00:00,2023-11-08 17:13:44+00:00,11675.0,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,...,,,,,,,,,,
4,03e21e29-4c59-48b8-b971-710ff76eb47f,Gaudenzia West Shore PM IOP Group 2940 - 1fa30...,2023-12-11 21:59:43+00:00,2023-12-11 22:47:14+00:00,2851.0,zoom_vendors_live,False,1fa3049d-7e1f-4277-899e-8c6cd49a3c58,False,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,f8f03006-f16a-45cd-aa04-a1a20379b8f4,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-11-01 20:50:10+00:00,2023-11-01 23:31:27+00:00,9677.0,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,...,,,,,,,,,,
264,fa43cecb-e9ae-4ae7-b0a8-f52a44120b89,Gaudenzia - Coatesville - Stages of Change - 4...,2023-10-24 13:57:49+00:00,2023-10-24 15:53:32+00:00,6943.0,zoom_vendors_live,False,41ed0901-304d-4070-bd9d-c0cfdf30d2d2,True,,...,,,,,,,,,,
265,fb041f1f-68d8-4f65-9a07-0f4492f0994c,Gaudenzia West Shore PM IOP Group 2940 - 1fa30...,2023-10-25 20:35:05+00:00,2023-10-25 23:32:13+00:00,10628.0,zoom_vendors_live,False,1fa3049d-7e1f-4277-899e-8c6cd49a3c58,False,,...,,,,,,,,,,
266,fc535bbe-920f-44ea-8044-5f01ab63d3e8,Gaudenzia-Montgomery-Wednesday OP - 28de2a23-1...,2023-10-02 21:02:26+00:00,2023-10-04 03:02:25+00:00,107999.0,zoom_vendors_live,False,28de2a23-1972-4e66-bacc-9d733c27421d,False,,...,,,,,,,,,,


In [222]:
df_sin_duplicados = result_dataframe.drop_duplicates(subset='session_id')
df_sin_duplicados

Unnamed: 0,individual_session_id_x,group_x,session_start_x,session_end_x,session duration_x,technology_x,is_note_confirmed_x,group_therapy_id_x,is scribe_x,ehr_x,...,group_session_id,individual_session_notes,num_individual_notes,eleos_individual_suggestion_rate,added_group_sentences_y,uuid_y,total_speakers,total_talk_time,speakers_less_than_threshold,confidence_therapist_level
0,00db3229-1cfb-46fd-a0b3-c767111050d6,Gaudenzia-Dresher-WG Evening Group - 9ec10cf7-...,2023-09-12 19:56:13+00:00,2023-09-12 21:18:34+00:00,0 days 01:22:21,zoom_vendors_live,False,9ec10cf7-b2e8-4ea4-bb27-31f1000772bb,False,,...,,,,,,,,,,
6,05728748-b7f3-4ccf-85b3-818de807ff55,Gaudenzia West Shore PM IOP Group 2940 - 1fa30...,2023-10-23 20:59:49+00:00,2023-10-23 23:28:48+00:00,0 days 02:28:59,zoom_vendors_live,False,1fa3049d-7e1f-4277-899e-8c6cd49a3c58,False,,...,05728748-b7f3-4ccf-85b3-818de807ff55,[The group therapy session discussed the topic...,1.0,0.0,[The group therapy session discussed the topic...,,,,,
9,09198d04-1948-422f-b9f2-7afb1fb63b9b,APN Currant Process Group - d1437136-15de-40d3...,2023-12-13 16:57:52+00:00,2023-12-13 18:49:20+00:00,0 days 01:51:28,zoom_vendors_live,False,d1437136-15de-40d3-9186-490a3d1777a9,False,,...,09198d04-1948-422f-b9f2-7afb1fb63b9b,[The client reflected on how emotional struggl...,2.0,0.0,[The client reflected on how emotional struggl...,09198d04-1948-422f-b9f2-7afb1fb63b9b,15.0,101.93,10.0,0.745183
14,0ec5edda-0349-4aac-bffa-992a8361b606,Gaudenzia West Shore OP Group 4711 - 2d7a2217-...,2023-12-07 17:03:35+00:00,2023-12-07 18:37:45+00:00,0 days 01:34:10,zoom_vendors_live,False,2d7a2217-0beb-4930-b84c-b186b64999da,False,,...,0ec5edda-0349-4aac-bffa-992a8361b606,[The 12 Noon OP Group is a hybrid group that m...,1.0,0.0,[The 12 Noon OP Group is a hybrid group that m...,0ec5edda-0349-4aac-bffa-992a8361b606,7.0,81.76,1.0,0.880418
33,22803474-d6d6-4598-a0a2-87a7608b773b,Gaudenzia West Shore OP Group 4711 - 2d7a2217-...,2023-12-14 16:50:35+00:00,2023-12-14 18:43:17+00:00,0 days 01:52:42,zoom_vendors_live,False,2d7a2217-0beb-4930-b84c-b186b64999da,False,,...,22803474-d6d6-4598-a0a2-87a7608b773b,[The 12 noon OP group is a hybrid group that m...,3.0,0.0,[The 12 noon OP group is a hybrid group that m...,22803474-d6d6-4598-a0a2-87a7608b773b,9.0,92.4,2.0,0.7073
36,2624919d-cdd2-4616-aa41-704b22bc9402,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-07-31 13:51:59+00:00,2023-07-31 16:11:04+00:00,0 days 02:19:05,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,...,,,,,,,,,,
38,2748cd17-792d-4873-b43a-468eade30db4,Gaudenzia West Shore OP Group 4711 - 2d7a2217-...,2023-11-30 17:01:12+00:00,2023-11-30 18:28:30+00:00,0 days 01:27:18,zoom_vendors_live,False,2d7a2217-0beb-4930-b84c-b186b64999da,False,,...,,,,,,2748cd17-792d-4873-b43a-468eade30db4,5.0,34.0,1.0,0.832041
40,2aa515ca-f6cf-47bb-b69a-5458b14d2eac,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-10-23 12:56:53+00:00,2023-10-23 16:07:58+00:00,0 days 03:11:05,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,...,2624919d-cdd2-4616-aa41-704b22bc9402,[The AM IOP Group is a hybrid group that meets...,3.0,0.0,[The AM IOP Group is a hybrid group that meets...,,,,,
54,3d443229-0fe1-460b-a135-2e43254aea73,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-07-21 13:00:54+00:00,2023-07-21 16:16:21+00:00,0 days 03:15:27,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,...,,,,,,,,,,
65,4b675d94-4ff3-4312-a9e5-2525d211e23a,Gaudenzia West Shore PM IOP Group 2940 - 1fa30...,2023-09-14 20:56:49+00:00,2023-09-15 00:01:52+00:00,0 days 03:05:03,zoom_vendors_live,False,1fa3049d-7e1f-4277-899e-8c6cd49a3c58,False,,...,2748cd17-792d-4873-b43a-468eade30db4,[The 12 Noon OP Group is a hybrid group that m...,1.0,0.0,[The 12 Noon OP Group is a hybrid group that m...,,,,,


In [26]:
result_dataframe.total_speakers.mean()

9.8

In [27]:
def calculate_eleos_text_percentage_in_note_TEST(note, eleos_suggestions):
    """
    :param note: str
    :param section: note section name (key of note dict)
    :param eleos_suggestions: list of strings
    :return: dict
    """
    try:
        note = [s.strip() for s in note.split('.') if s.strip() != '']
        eleos_sentences = find_eleos_text_in_paragraph(paragraph=note, eleos_text=eleos_suggestions)
        note_length = len(note)
        if note_length==0:
            return 0, []
        number_of_eleos_sentences = len(eleos_sentences)
        added_sentences = [x for x in note if x not in eleos_sentences]
        percentage = number_of_eleos_sentences / note_length * 100
        
        return percentage, added_sentences
    except Exception as e:
        print(f"Error on merge:{str(e)}")
        return None, None

In [28]:
test=result_dataframe.apply(lambda row: calculate_eleos_text_percentage_in_note_TEST(''.join(row['eleos_group_notes']),
                                                                                       row['eleos_group_suggestion_notes'] 
                                                                                      ),
                            axis=1,
                            result_type='expand')
test

Unnamed: 0,0,1
0,0.0,[The focus of the group therapy session was on...
1,0.0,[]
2,0.0,[The group therapy session discussed the topic...
3,0.0,[Clinician utilized a psychodynamic approach t...
4,0.0,[The focus of the group therapy session was on...
5,0.0,[The 12 Noon OP Group is a hybrid group that m...
6,0.0,[]
7,0.0,[]
8,0.0,[The focus of the group therapy session was on...
9,0.0,[The 12 noon OP group is a hybrid group that m...


In [220]:
result_dataframe['uuid'].nunique()

40

In [30]:
chen_list = pd.read_csv('/home/gabrielkws/Documents/chen group sessions.csv')
chen_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   #               50 non-null     int64 
 1   id              50 non-null     object
 2   therapist_mail  50 non-null     object
dtypes: int64(1), object(2)
memory usage: 1.3+ KB


In [223]:
#60aacfbf-efdf-4aa1-bcc1-365291e4f7f6
#05728748-b7f3-4ccf-85b3-818de807ff55
def format_code(code):
    
    code_str = str(code)
    return f'{code_str[:8]}-{code_str[8:12]}-{code_str[12:16]}-{code_str[16:20]}-{code_str[20:]}'

chen_list['uuid']=chen_list['id'].apply(format_code)
chen_list



Unnamed: 0,#,id,therapist_mail,uuid
0,1,05728748b7f34ccf85b3818de807ff55,mirwin@gaudenzia.org,05728748-b7f3-4ccf-85b3-818de807ff55
1,2,09198d041948422fb9f27afb1fb63b9b,sdaniel@apn.com,09198d04-1948-422f-b9f2-7afb1fb63b9b
2,3,2aa515caf6cf47bbb69a5458b14d2eac,lknoche@gaudenzia.org,2aa515ca-f6cf-47bb-b69a-5458b14d2eac
3,4,4398946ecc7c4662ae85d3b0cd3d0c44,ntomczak@gaudenzia.org,4398946e-cc7c-4662-ae85-d3b0cd3d0c44
4,5,6b1316c59ddd4b6d8249229cdef1e481,jbabil@gaudenzia.org,6b1316c5-9ddd-4b6d-8249-229cdef1e481
5,6,77258d09cf714284a025c4579b54c3a0,mirwin@gaudenzia.org,77258d09-cf71-4284-a025-c4579b54c3a0
6,7,81183f6506574a009922e7d44efd70c5,ntomczak@gaudenzia.org,81183f65-0657-4a00-9922-e7d44efd70c5
7,8,9ef6fb0beb764ea58fc1c3454f2dddf1,ntomczak@gaudenzia.org,9ef6fb0b-eb76-4ea5-8fc1-c3454f2dddf1
8,9,a0b95acdaed8425fa3fa4e1cedb20ffb,ntomczak@gaudenzia.org,a0b95acd-aed8-425f-a3fa-4e1cedb20ffb
9,10,afc6960160c149c09fa320f161fd55d5,lknoche@gaudenzia.org,afc69601-60c1-49c0-9fa3-20f161fd55d5


In [224]:
missing_codes = set(chen_list['uuid']) - set(result_dataframe['session_id'])
missing_codes

{'05728748-b7f3-4ccf-85b3-818de807ff55',
 '09198d04-1948-422f-b9f2-7afb1fb63b9b',
 '0ec5edda-0349-4aac-bffa-992a8361b606',
 '22803474-d6d6-4598-a0a2-87a7608b773b',
 '2624919d-cdd2-4616-aa41-704b22bc9402',
 '2748cd17-792d-4873-b43a-468eade30db4',
 '29464b20-95e9-4563-bdcb-131047d6f3a7',
 '2aa515ca-f6cf-47bb-b69a-5458b14d2eac',
 '3d443229-0fe1-460b-a135-2e43254aea73',
 '4301b70a-8905-4861-a01a-7c5e70a27ae3',
 '4398946e-cc7c-4662-ae85-d3b0cd3d0c44',
 '4b675d94-4ff3-4312-a9e5-2525d211e23a',
 '4ccdc434-e568-4965-b309-eb71c3d69082',
 '50d8a1a4-7f56-4c09-b728-7b72305c46be',
 '53573ee9-002c-47f1-bf8b-9db4d1ab606b',
 '5d3368d8-ccb3-4e7b-aa5f-12d6ad040532',
 '60aacfbf-efdf-4aa1-bcc1-365291e4f7f6',
 '653d7fb4-3e78-4073-9785-6af548647ab9',
 '6544ec8c-01a6-4a68-8c07-29d46c9e18df',
 '684538db-71ea-48e5-a6b4-8d1a31d82487',
 '6b1316c5-9ddd-4b6d-8249-229cdef1e481',
 '6c4ebf77-de4d-4124-922d-6036cc0fa768',
 '6cf9dedf-fb1f-4689-a1c2-00b17d6f07e8',
 '71353e6a-81ab-4530-adcd-c41b8dffcaf4',
 '77258d09-cf71-

In [39]:
res=group_sessions_individual_notes (start_date, end_date)
res

100%|██████████| 40/40 [00:00<00:00, 9635.43it/s]


Unnamed: 0,individual_session_id,group,session_start,session_end,session duration,technology,is_note_confirmed,group_therapy_id,is scribe,ehr,saved and signed,is note confirmed,therapists,patients,number of patients,uuid,eleos_notes,num_notes,therapist_mail
0,05728748-b7f3-4ccf-85b3-818de807ff55,Gaudenzia West Shore PM IOP Group 2940 - 1fa30...,2023-10-23 20:59:49+00:00,2023-10-23 23:28:48+00:00,0 days 02:28:59,zoom_vendors_live,False,1fa3049d-7e1f-4277-899e-8c6cd49a3c58,False,,,False,"[Elise Narkiewicz, Keri Wintersteen, Laila Eng...","[Angela Kline, Bradley Biller, William Nalley]",3,05728748-b7f3-4ccf-85b3-818de807ff55,[The group therapy session discussed the topic...,1,
1,09198d04-1948-422f-b9f2-7afb1fb63b9b,APN Currant Process Group - d1437136-15de-40d3...,2023-12-13 16:57:52+00:00,2023-12-13 18:49:20+00:00,0 days 01:51:28,zoom_vendors_live,False,d1437136-15de-40d3-9186-490a3d1777a9,False,,,False,[Scott Daniel],"[Brandon C Burns, Brandon Yeubanks, Fitz Patri...",3,09198d04-1948-422f-b9f2-7afb1fb63b9b,[The client reflected on how emotional struggl...,2,
2,0ec5edda-0349-4aac-bffa-992a8361b606,Gaudenzia West Shore OP Group 4711 - 2d7a2217-...,2023-12-07 17:03:35+00:00,2023-12-07 18:37:45+00:00,0 days 01:34:10,zoom_vendors_live,False,2d7a2217-0beb-4930-b84c-b186b64999da,False,,,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[Adriana Rivera-Colon, Allazzar Cato, Brian Ad...",10,0ec5edda-0349-4aac-bffa-992a8361b606,[The 12 Noon OP Group is a hybrid group that m...,1,
3,22803474-d6d6-4598-a0a2-87a7608b773b,Gaudenzia West Shore OP Group 4711 - 2d7a2217-...,2023-12-14 16:50:35+00:00,2023-12-14 18:43:17+00:00,0 days 01:52:42,zoom_vendors_live,False,2d7a2217-0beb-4930-b84c-b186b64999da,False,,,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[Adriana Rivera-Colon, Allazzar Cato, Brian Ad...",10,22803474-d6d6-4598-a0a2-87a7608b773b,[The 12 noon OP group is a hybrid group that m...,3,
4,2624919d-cdd2-4616-aa41-704b22bc9402,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-07-31 13:51:59+00:00,2023-07-31 16:11:04+00:00,0 days 02:19:05,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[000000000, Adriana Rivera-Colon, Billy Mentze...",18,2624919d-cdd2-4616-aa41-704b22bc9402,[The AM IOP Group is a hybrid group that meets...,3,
5,2748cd17-792d-4873-b43a-468eade30db4,Gaudenzia West Shore OP Group 4711 - 2d7a2217-...,2023-11-30 17:01:12+00:00,2023-11-30 18:28:30+00:00,0 days 01:27:18,zoom_vendors_live,False,2d7a2217-0beb-4930-b84c-b186b64999da,False,,,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[Adriana Rivera-Colon, Allazzar Cato, Brian Ad...",10,2748cd17-792d-4873-b43a-468eade30db4,[The 12 Noon OP Group is a hybrid group that m...,1,
6,2aa515ca-f6cf-47bb-b69a-5458b14d2eac,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-10-23 12:56:53+00:00,2023-10-23 16:07:58+00:00,0 days 03:11:05,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[000000000, Adriana Rivera-Colon, Billy Mentze...",18,2aa515ca-f6cf-47bb-b69a-5458b14d2eac,[In today's group all members checked in. Toda...,3,
7,3d443229-0fe1-460b-a135-2e43254aea73,"Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",2023-07-21 13:00:54+00:00,2023-07-21 16:16:21+00:00,0 days 03:15:27,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,,,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[000000000, Adriana Rivera-Colon, Billy Mentze...",18,3d443229-0fe1-460b-a135-2e43254aea73,[The group therapy session discussed the topic...,2,
8,4398946e-cc7c-4662-ae85-d3b0cd3d0c44,Gaudenzia - Coatesville - Steps to Recovery - ...,2023-10-27 13:56:49+00:00,2023-10-27 15:18:01+00:00,0 days 01:21:12,zoom_vendors_live,False,c6f8350e-388e-460f-acf2-295c3654b7dc,True,,,False,"[Angel Sargent, Nicholas Tomczak, Barry Trout,...","[Bryan Lyons, Dyisha Jackson, Hassan Dandridge...",6,4398946e-cc7c-4662-ae85-d3b0cd3d0c44,[],1,
9,4b675d94-4ff3-4312-a9e5-2525d211e23a,Gaudenzia West Shore PM IOP Group 2940 - 1fa30...,2023-09-14 20:56:49+00:00,2023-09-15 00:01:52+00:00,0 days 03:05:03,zoom_vendors_live,False,1fa3049d-7e1f-4277-899e-8c6cd49a3c58,False,,,False,"[Elise Narkiewicz, Keri Wintersteen, Laila Eng...","[Angela Kline, Bradley Biller, William Nalley]",3,4b675d94-4ff3-4312-a9e5-2525d211e23a,[There were 5 clients in group tonight all in-...,2,


In [43]:
raw=GroupTherapySession.objects.filter()
raw

<QuerySet [<GroupTherapySession: {"id": "004ba0b9-3d37-4562-8e2f-e2ab087b118b", "group_therapy_group": "Group Test - 556e4a23-8f70-418d-bdfb-cdd6f0dec564", "index": "22", "start_ts": "2023-06-04 13:17:13+00:00"}>, <GroupTherapySession: {"id": "00db3229-1cfb-46fd-a0b3-c767111050d6", "group_therapy_group": "Gaudenzia-Dresher-WG Evening Group - 9ec10cf7-b2e8-4ea4-bb27-31f1000772bb", "index": "10", "start_ts": "2023-09-12 19:56:13+00:00"}>, <GroupTherapySession: {"id": "01d854dd-7302-47d0-8bed-070967cd10a3", "group_therapy_group": "Gaudenzia-Montgomery-Wednesday OP - 28de2a23-1972-4e66-bacc-9d733c27421d", "index": "2", "start_ts": "2023-07-20 21:07:38+00:00"}>, <GroupTherapySession: {"id": "02d06daf-0643-41a5-a1c9-47c22395a26b", "group_therapy_group": "Gaudenzia - Coatesville - Mindfulness Mondays - 923762e8-4907-4913-a6e5-657cc114d6d0", "index": "0", "start_ts": "2023-11-20 16:37:23+00:00"}>, <GroupTherapySession: {"id": "02eccf40-d16f-4bc6-a630-918645d6b161", "group_therapy_group": "Gaud

In [49]:
df_raw=pd.DataFrame(raw)
df_raw

Unnamed: 0,0
0,"{""id"": ""004ba0b9-3d37-4562-8e2f-e2ab087b118b"",..."
1,"{""id"": ""00db3229-1cfb-46fd-a0b3-c767111050d6"",..."
2,"{""id"": ""01d854dd-7302-47d0-8bed-070967cd10a3"",..."
3,"{""id"": ""02d06daf-0643-41a5-a1c9-47c22395a26b"",..."
4,"{""id"": ""02eccf40-d16f-4bc6-a630-918645d6b161"",..."
...,...
345,"{""id"": ""fb041f1f-68d8-4f65-9a07-0f4492f0994c"",..."
346,"{""id"": ""fb676117-59e5-49e1-9f63-82b29e2f78fc"",..."
347,"{""id"": ""fc535bbe-920f-44ea-8044-5f01ab63d3e8"",..."
348,"{""id"": ""fde7b2f6-8f9d-44ac-bb8e-6e2a6538035d"",..."


In [50]:
def get_group_sessions_chen(start_date, end_date):
    sessions = GroupTherapySession.objects.filter(
        actual_start_ts__gte=start_date,
        actual_start_ts__lt=end_date
    )

    
    g_dict = {}
    for g in sessions:
            patients_list = [patient.name for patient in g.group_therapy_group.patients.all()]
            
            g_dict[g.id] = {
                'individual_session_id': missing_codes,
                'group': g.group_therapy_group,
                'organization':g.group_therapy_group.organization,
                'organization_id':g.group_therapy_group.organization_id,
                'session_start': g.actual_start_ts,
                'session_end': g.actual_end_ts,
                'session duration': (g.actual_end_ts - g.actual_start_ts) if (g.actual_start_ts and g.actual_end_ts) else None,
                'technology': g.technology_used,
                'is_note_confirmed': g.is_note_confirmed,
                'group_therapy_id': g.group_therapy_group_id,
                'is scribe':g.group_therapy_group.is_scribe,
                'ehr':g.group_therapy_group.ehr_group_identifier,
                'saved and signed':g.saved_and_signed,
                'is note confirmed':g.is_note_confirmed,
                'therapists':[therapist.name for therapist in g.group_therapy_group.therapists.all()],
                
                
                'patients': patients_list,
                'number of patients':len(patients_list)
                
             
            }

    group_sessions = pd.DataFrame(g_dict).T
    return group_sessions

df3=get_group_sessions_chen(start_date, end_date)
df3



Unnamed: 0,individual_session_id,group,organization,organization_id,session_start,session_end,session duration,technology,is_note_confirmed,group_therapy_id,is scribe,ehr,saved and signed,is note confirmed,therapists,patients,number of patients
004ba0b9-3d37-4562-8e2f-e2ab087b118b,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Group Test - 556e4a23-8f70-418d-bdfb-cdd6f0dec564,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-06-04 13:17:13+00:00,2023-06-04 14:05:05+00:00,0 days 00:47:52,zoom_vendors_live,False,556e4a23-8f70-418d-bdfb-cdd6f0dec564,False,"[11, 22]",,False,"[Therapist, eilam, Gaudenzia Test User2, Dana ...","[000000000, Aaron Test, Admission Test, Alice ...",18
00db3229-1cfb-46fd-a0b3-c767111050d6,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Gaudenzia-Dresher-WG Evening Group - 9ec10cf7-...,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-09-12 19:56:13+00:00,2023-09-12 21:18:34+00:00,0 days 01:22:21,zoom_vendors_live,False,9ec10cf7-b2e8-4ea4-bb27-31f1000772bb,False,[000088],,False,"[Gerry Lynch, Theresa Santiago]",[],0
01d854dd-7302-47d0-8bed-070967cd10a3,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Gaudenzia-Montgomery-Wednesday OP - 28de2a23-1...,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-07-20 21:07:38+00:00,2023-07-22 03:07:37+00:00,1 days 05:59:59,zoom_vendors_live,False,28de2a23-1972-4e66-bacc-9d733c27421d,False,[102],,False,"[Lafeeq Bowman, Sherene Phinizy]","[Erica Swint, Madison Smith, Rashawn Washington]",3
02d06daf-0643-41a5-a1c9-47c22395a26b,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Gaudenzia - Coatesville - Mindfulness Mondays ...,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-11-20 16:37:23+00:00,2023-11-20 16:39:32+00:00,0 days 00:02:09,zoom_vendors_live,False,923762e8-4907-4913-a6e5-657cc114d6d0,True,[5],,False,"[Nicholas Tomczak, Barry Trout, Todd Buckwalte...",[],0
02eccf40-d16f-4bc6-a630-918645d6b161,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...","Gaudenzia West Shore AM IOP 1890,1891,1892 - 5...",Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-11-08 13:59:09+00:00,2023-11-08 17:13:44+00:00,0 days 03:14:35,zoom_vendors_live,False,564115d4-bb6e-4f4e-9e58-3876381ffab1,False,"[1890, 1891, 1892]",,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[000000000, Adriana Rivera-Colon, Billy Mentze...",18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fa43cecb-e9ae-4ae7-b0a8-f52a44120b89,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Gaudenzia - Coatesville - Stages of Change - 4...,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-10-24 13:57:49+00:00,2023-10-24 15:53:32+00:00,0 days 01:55:43,zoom_vendors_live,False,41ed0901-304d-4070-bd9d-c0cfdf30d2d2,True,[55],,False,"[Nicholas Tomczak, Barry Trout, Todd Buckwalte...","[Bryan Lyons, Charles Gorman, Noel Lourido, Se...",4
fb041f1f-68d8-4f65-9a07-0f4492f0994c,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Gaudenzia West Shore PM IOP Group 2940 - 1fa30...,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-10-25 20:35:05+00:00,2023-10-25 23:32:13+00:00,0 days 02:57:08,zoom_vendors_live,False,1fa3049d-7e1f-4277-899e-8c6cd49a3c58,False,[2940],,False,"[Elise Narkiewicz, Keri Wintersteen, Laila Eng...","[Angela Kline, Bradley Biller, William Nalley]",3
fc535bbe-920f-44ea-8044-5f01ab63d3e8,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Gaudenzia-Montgomery-Wednesday OP - 28de2a23-1...,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-10-02 21:02:26+00:00,2023-10-04 03:02:25+00:00,1 days 05:59:59,zoom_vendors_live,False,28de2a23-1972-4e66-bacc-9d733c27421d,False,[102],,False,"[Lafeeq Bowman, Sherene Phinizy]","[Erica Swint, Madison Smith, Rashawn Washington]",3
fde7b2f6-8f9d-44ac-bb8e-6e2a6538035d,"{2aa515ca-f6cf-47bb-b69a-5458b14d2eac, 9ef6fb0...",Gaudenzia West Shore OP Group 4711 - 2d7a2217-...,Gaudenzia2,45950913-6095-426d-aff3-b6eea322a111,2023-11-16 16:55:46+00:00,NaT,NaT,zoom_vendors_live,False,2d7a2217-0beb-4930-b84c-b186b64999da,False,[4711],,False,"[Lucy Knoche, Melanie Irwin, Nikki Weir, Joy B...","[Adriana Rivera-Colon, Allazzar Cato, Brian Ad...",10


In [52]:
df3['group_therapy_id'].nunique()

14

In [59]:
def total_info_individual_notes_chen (sessions):

    ind_notes = GenericGroupIndividualReport.objects.filter(group_session_id__in=sessions['group_therapy_id'].unique())


    combined_data=[]
    for s in ind_notes:
        
            session_id=s.group_session_id
            note=get_ind_notes_optimized(s)
            combined_data.append({
                'uuid':session_id,
                'individual_session_notes':note,
                'group_session_id': s.group_session.id,
                #'therapist_mail':s.therapist,
                'therapist_id':s.therapist_id
            })

    combined_df= pd.DataFrame(combined_data)
    #combined_df_filtered = filter_valid_emails(combined_df)
    return combined_df
#combined_dataframe = total_info_individual_notes(sessions_selected)    
#combined_dataframe
df4 = total_info_individual_notes_chen(df3)    
df4

In [63]:
ind_notes4 = GenericGroupIndividualReport.objects.filter(group_session_id__in=df3['group_therapy_id'].unique())
ind_notes4

<QuerySet []>

In [225]:
result_dataframe.to_csv('/home/gabrielkws/Downloads/result_dataframeII.csv')