# BACP & UKCP Modality Count
## Part 1: Joining the dataframes

In [4]:
import pandas as pd # data analytics
import re # regular expressions

In [5]:
# Reading in UKCP data
ukcp_df = pd.read_csv('C:\\Users\\laimi\\Desktop\\Studies\\Data Analytics\\Portfolio\\Types of Psychotherapy\\Code\\main\\saved csv files\\ukcp_df.csv')
ukcp_df.drop(columns='Unnamed: 0', inplace=True)

In [6]:
# Reading in BACP data
bacp_df = pd.read_csv('C:\\Users\\laimi\\Desktop\\Studies\\Data Analytics\\Portfolio\\Types of Psychotherapy\\Code\\main\\saved csv files\\bacp_df.csv')
bacp_df.drop(columns='Unnamed: 0', inplace=True)

In [26]:
bacp_df.head()

Unnamed: 0,name,address,Availability,About me and my therapy practice,Practice description,My first session,What I can help with,Types of therapy,Clients I work with,How I deliver therapy
0,Susie Jamieson,Templand,My Practice is online only.I offer appointment...,"When the unexpected arises, or the past just w...",You can choose to work with me short-term or l...,I offer a free no obligation initial 30 minute...,"Abuse, Anger management, Anxiety, Bereavement,...","CBT, Creative therapy, Eclectic, Humanistic, I...","Adults, EAP, Older adults","Online counselling, Telephone counselling"
1,Clare Elliot,Biggar,I am offering telephone and video sessions only.,I provide online (zoom) and telephone counsell...,,I offer an initial 30 minute free no obligatio...,"Abuse, Anger management, Anxiety, Bereavement,...","Humanistic, Integrative, Person centred","Adults, Children, Older adults, Organisations,...","Online counselling, Telephone counselling"
2,Janet Yasities,Dumfries,I am available between the hours of 09:00 and ...,I AM CURRENTLY OFFERING ON-LINE SESSIONS ONLYA...,,,"Abuse, Anger management, Anxiety, Depression, ...","Behavioural, Brief therapy, CBT, Cognitive, Co...","Adults, EAP, Groups, Older adults, Trainee, Yo...","Long-term face-to-face work, Online counsellin..."
3,Jennifer Collins,"Peebles, Scottish Borders",I don’t provide sessions at weekends.Fees are ...,"Hi, I'm Jen and I'm a counsellor/CBT Therapist...",Are things just feeling out of kilter? Know so...,After our initial consultation I will send you...,"Anxiety, Bereavement, Career coaching, Depress...","Behavioural, CBT, Cognitive, Eclectic, Humanis...","Adults, Older adults","Long-term face-to-face work, Online counsellin..."
4,Clair Higgon,DUMFRIES,Thank you for looking at my profile. I am curr...,I am a person centred therapist and in practic...,I work from a consulting room in my home. I of...,The initial session is about 45 minutes in len...,"Abuse, Anxiety, Bereavement, Depression, Healt...",Person centred,"Adults, Children, Couples, EAP, Groups, Organi...","Long-term face-to-face work, Online counsellin..."


In [27]:
ukcp_df.head()

Unnamed: 0,name,address,Types of Therapies Offered,I work with,What I can help with,My Approach,About Me,Special Interests
0,Naomi Landau,London NW6,Integrative Psychotherapist,,,,,
1,Siobhan Tinker,SEVENOAKS TN13,Psychosynthesis Psychotherapist,"Families, Individuals, Private healthcare refe...","Abuse, Adoption, Anger Management, Anxiety, Be...",,,
2,Rehma Said,Hayes UB4,"Family and Systemic Psychotherapist, Family Th...",,,,,
3,Yvonne Rose,Sheffield S10,"Family and Systemic Psychotherapist, Family Th...",,,,,
4,Suzanna Brown,,Transpersonal Psychotherapist,Individuals,Abuse,,,


I will join the dataframes as they share the main columns

In [28]:
# Dropping unneeded bacp columns
bacp_df2 = bacp_df.copy() # A copy to keep the original
bacp_df2.drop(columns=['Availability', 'My first session', 'How I deliver therapy'], inplace=True)

# Renaming columns for a merge
bacp_df2.columns=['name', 'address', 'About Me', 'My Approach', 'What I can help with', 'Types of Therapies Offered',
                 'I work with']

In [29]:
# Dropping unneeded ukcp columns
ukcp_df2 = ukcp_df.copy() # A copy to keep the original
ukcp_df2.drop(columns=['Special Interests'], inplace=True)

In [30]:
# Adding a column that identifies where the profile was listed
bacp_df2['data from'] = 'BACP'
ukcp_df2['data from'] = 'UKCP'

In [31]:
bacp_df2.head()

Unnamed: 0,name,address,About Me,My Approach,What I can help with,Types of Therapies Offered,I work with,data from
0,Susie Jamieson,Templand,"When the unexpected arises, or the past just w...",You can choose to work with me short-term or l...,"Abuse, Anger management, Anxiety, Bereavement,...","CBT, Creative therapy, Eclectic, Humanistic, I...","Adults, EAP, Older adults",BACP
1,Clare Elliot,Biggar,I provide online (zoom) and telephone counsell...,,"Abuse, Anger management, Anxiety, Bereavement,...","Humanistic, Integrative, Person centred","Adults, Children, Older adults, Organisations,...",BACP
2,Janet Yasities,Dumfries,I AM CURRENTLY OFFERING ON-LINE SESSIONS ONLYA...,,"Abuse, Anger management, Anxiety, Depression, ...","Behavioural, Brief therapy, CBT, Cognitive, Co...","Adults, EAP, Groups, Older adults, Trainee, Yo...",BACP
3,Jennifer Collins,"Peebles, Scottish Borders","Hi, I'm Jen and I'm a counsellor/CBT Therapist...",Are things just feeling out of kilter? Know so...,"Anxiety, Bereavement, Career coaching, Depress...","Behavioural, CBT, Cognitive, Eclectic, Humanis...","Adults, Older adults",BACP
4,Clair Higgon,DUMFRIES,I am a person centred therapist and in practic...,I work from a consulting room in my home. I of...,"Abuse, Anxiety, Bereavement, Depression, Healt...",Person centred,"Adults, Children, Couples, EAP, Groups, Organi...",BACP


In [32]:
# Joining the dataframes
joined_df = pd.concat([bacp_df2, ukcp_df2])
joined_df = joined_df.reset_index()
joined_df.drop(columns='index', inplace=True)

In [33]:
joined_df

Unnamed: 0,name,address,About Me,My Approach,What I can help with,Types of Therapies Offered,I work with,data from
0,Susie Jamieson,Templand,"When the unexpected arises, or the past just w...",You can choose to work with me short-term or l...,"Abuse, Anger management, Anxiety, Bereavement,...","CBT, Creative therapy, Eclectic, Humanistic, I...","Adults, EAP, Older adults",BACP
1,Clare Elliot,Biggar,I provide online (zoom) and telephone counsell...,,"Abuse, Anger management, Anxiety, Bereavement,...","Humanistic, Integrative, Person centred","Adults, Children, Older adults, Organisations,...",BACP
2,Janet Yasities,Dumfries,I AM CURRENTLY OFFERING ON-LINE SESSIONS ONLYA...,,"Abuse, Anger management, Anxiety, Depression, ...","Behavioural, Brief therapy, CBT, Cognitive, Co...","Adults, EAP, Groups, Older adults, Trainee, Yo...",BACP
3,Jennifer Collins,"Peebles, Scottish Borders","Hi, I'm Jen and I'm a counsellor/CBT Therapist...",Are things just feeling out of kilter? Know so...,"Anxiety, Bereavement, Career coaching, Depress...","Behavioural, CBT, Cognitive, Eclectic, Humanis...","Adults, Older adults",BACP
4,Clair Higgon,DUMFRIES,I am a person centred therapist and in practic...,I work from a consulting room in my home. I of...,"Abuse, Anxiety, Bereavement, Depression, Healt...",Person centred,"Adults, Children, Couples, EAP, Groups, Organi...",BACP
...,...,...,...,...,...,...,...,...
19902,Stella Ridley,GOSPORT PO12,,,,"Lacanian Analyst, Psychoanalytic Psychotherapist",,UKCP
19903,Lisa Tedeschini,,,,,Analytical Psychologist - Jungian Analyst,"Individuals, Private healthcare referrals",UKCP
19904,Claire Barber,,I work with people experiencing emotional diff...,,,"Family and Systemic Psychotherapist, Family Th...","Children and young people, Couples, Families, ...",UKCP
19905,Catherine Collins,,I work for the NHS in forensic services. I hav...,I am a group analytic psychotherapist and work...,"Mental Health Issues, Online Counselling, Priv...",Group Analyst,"Companies, Couples, Families, Groups, Individu...",UKCP


In [34]:
joined_df['name'].duplicated().sum()

956

Although there are some duplicated names, and some duplicate profiles (e.g. same person registered on BACP and UKCP), a closer look at the profiles shows that they are not identical, i.e. the same person can be offering one type of therapy on BACP register, and another on UKCP - probably due to different registration criteria. It means keeping these records won't affect my analysis.

## Part 2: Cleaning and defining therapy types

During previous EDA I have found that therapists describe their type of therapies offered inconsistently. While the majorty might be specified in 'Types of Therapies Offered' section, some are not, and it might underrepresent some of the modalities. For example, 'Integrative Psychotherapy' might be listed under one section, but a more detailed description would be provided in another section, explaining that the therapist combines psychodynamic and relational modelities. I will therefore also scan other sections of the profiles for keywords that will tell me whether some of the modalities were mentioned there instead.

The challenge here is to not overboost the numbers as well. For example, many therapists are able to work with a wide range problems, and would list that in the 'What I can help with' section. However, for example, just because they can help with sex issues does not mean they are a psychosexual therapist. For this reason I will only scan this specific section for 'emdr' - for some reason, it is mainly listed there.

I will approach this by creating a function that scans the profiles for specific therapy types using defined names as keywords. If a match is identified, it will add 1 to the counter.

In [36]:
# Splitting combined listings - e.g. 'Child and Adolescent' into 'Child' and 'Adolescent'. This is to make listings a little
# more consistent and convenient to work with.

for i in range(0, len(joined_df)): 
    lst = str(joined_df.iloc[i, 5]).split(', ') # creating a list of modalities for each profile / row of df
    new_lst = [] # an empty list I will add new modality names to
    for item in lst:
        if item == 'Child and Adolescent Psychotherapeutic Counsellor':
            new_item = 'Child Psychotherapist, Adolescent Psychotherapist'
        elif item == 'Child and Adolescent Psychotherapist':
            new_item = 'Child Psychotherapist, Adolescent Psychotherapist'
        elif item == 'Family and Systemic Psychotherapist':
            new_item = 'Family Psychotherapist, Systemic Psychotherapist'
        elif item == 'Humanistic and Integrative Psychotherapist':
            new_item = 'Humanistic Psychotherapist, Integrative Psychotherapist'
        elif item == 'Systemic Family and Couple Psychotherapist':
            new_item = 'Systemic Psychotherapist, Family Psychotherapist, Couple Psychotherapist'
        else:
            new_item = item
        new_lst.append(new_item)
    
    new_str = ', '.join(new_lst) # joining the new list into a string
    new_lst = new_str.split(', ') # splitting again into a list. Due to renaming above, this will give me a different list
    new_set = set(new_lst) # this will get rid of duplicate listings
    new_str = ', '.join(new_set) # joining again into a string
    joined_df.iloc[i, 5] = new_str # resetting the cell's value into a new one

In [37]:
joined_df.head()

Unnamed: 0,name,address,About Me,My Approach,What I can help with,Types of Therapies Offered,I work with,data from
0,Susie Jamieson,Templand,"When the unexpected arises, or the past just w...",You can choose to work with me short-term or l...,"Abuse, Anger management, Anxiety, Bereavement,...","Eclectic, Humanistic, Integrative, Transaction...","Adults, EAP, Older adults",BACP
1,Clare Elliot,Biggar,I provide online (zoom) and telephone counsell...,,"Abuse, Anger management, Anxiety, Bereavement,...","Person centred, Humanistic, Integrative","Adults, Children, Older adults, Organisations,...",BACP
2,Janet Yasities,Dumfries,I AM CURRENTLY OFFERING ON-LINE SESSIONS ONLYA...,,"Abuse, Anger management, Anxiety, Depression, ...","CBT, Behavioural, Cognitive, Cognitive analyti...","Adults, EAP, Groups, Older adults, Trainee, Yo...",BACP
3,Jennifer Collins,"Peebles, Scottish Borders","Hi, I'm Jen and I'm a counsellor/CBT Therapist...",Are things just feeling out of kilter? Know so...,"Anxiety, Bereavement, Career coaching, Depress...","Eclectic, Humanistic, Integrative, CBT, Behavi...","Adults, Older adults",BACP
4,Clair Higgon,DUMFRIES,I am a person centred therapist and in practic...,I work from a consulting room in my home. I of...,"Abuse, Anxiety, Bereavement, Depression, Healt...",Person centred,"Adults, Children, Couples, EAP, Groups, Organi...",BACP


## Grouping Modalities

Some therapies are listed under different names, so I will group them together.

In [74]:
# Creating lists of therapies that group same modalities together. Differences between psychotherpist, counsellor, etc
# are beyond the scope of this project.

adlerian_list = ['adlerian']

adolescent_list = ['Adolescent Psychotherapeutic Counsellor', 'Adolescent Counsellor', 'Adolescent Psychotherapist']  

animal_list = ['animal assisted therapy']

behavioural_list = ['behavioural', 'behavioral', 'Cognitive and Behavioural Psychotherapist', 'cbt']

biodynamic_list = ['Biodynamic Psychotherapist']

# body psychotherapy is often used as umbrella term, therefore this list also includes biodynamic therapy
body_list = ['Body Psychotherapist', 'Biodynamic Psychotherapist']

brief_list = ['brief therapy', 'solution focused brief therapy', 'solution focused therapy',]

cbt_list = ['Cognitive and Behavioural Psychotherapist', 'cbt']

child_list = ['Child Psychotherapeutic Counsellor', 'Child Counsellor', 'Integrative Child Psychotherapist',
              'Child Psychotherapist', 'play therapy']

cognitive_list = ['cognitive', 'Cognitive Analytic Therapist', 'cognitive analytic therapy',
                  'Cognitive and Behavioural Psychotherapist', 'cbt']

cognitive_analytic_list = ['Cognitive Analytic Therapist', 'cognitive analytic therapy']

core_process_list = ['Core Process Psychotherapist', 'Mindfulness Based Psychotherapist']

couple_list = ['Couple Psychotherapist']

creative_arts_list = ['Integrative Arts Psychotherapist', 'Dance Movement Psychotherapist', 'Psychodrama Psychotherapist',
                      'creative therapy', 'art therapy']

educational_list = ['Educational Psychotherapist']

emdr_list = ['emdr']

existential_list = ['Existential Psychotherapist', 'Existential-Analytic Psychotherapist', 'existential']

eft_list = ['emotionally focused therapy']

family_list = ['Family Psychotherapist', 'family therapy']

gestalt_list = ['Gestalt Group Psychotherapist', 'Gestalt Psychotherapeutic Counsellor', 'Gestalt Psychotherapist', 'gestalt']

group_list = ['Gestalt Group Psychotherapist', 'Group Analyst', 'Group Analytic Psychotherapist']

humanistic_list = ['Humanistic Psychotherapeutic Counsellor', 'Humanistic Psychotherapist', 'humanistic']

hypno_list = ['Hypno -Psychotherapist']

integrative_list = ['Integrative Psychotherapeutic Counsellor', 'Contemporary Psychotherapist','Psychotherapeutic Counsellor',
                   'Integrative Psychotherapist', 'Integrative Arts Psychotherapist', 'Integrative Child Psychotherapist', 
                    'Integrative Psychosynthesis Psychotherapist', 'Integrative Psychotherapeutic Counsellor', 
                    'Integrative Psychotherapist', 'Integrative Transpersonal Psychotherapeutic Counsellor', 
                    'Integrative Transpersonal Psychotherapist', 'integrative', 'eclectic']

interpersonal_list = ['interpersonal']

jungian_list = ['Analytical Psychologist - Jungian Analyst', 'Analytical Psychotherapist (Jungian)', 'Analytical Psychologist',
                'Analytical Psychotherapist', 'Jungian Analytical Psychotherapist', 'Jungian Psychotherapist', 'jungian']

nlp_list = ['Neuro-linguistic Psychotherapist', 'neuro linguistic programming']

parent_infant_list = ['Parent Infant Psychoanalytic Psychotherapist']

person_centred_list = ['Person Centred Psychotherapist', 'person centred']

personal_construct_list = ['Personal Construct Psychotherapist']

phenomenological_list = ['phenomenological']

primal_list = ['primal therapy']

psychoanalysis_list = ['Psychoanalyst', 'Contemporary Psychoanalyst', 'Lacanian Analyst', 'psychoanalytic']

psychodynamic_list = ['Psychoanalytic Psychotherapist', 'Intercultural Psychoanalytical Psychotherapist',
                      'Attachment-based Psychoanalytic Psychotherapist', 'Psychodynamic Psychotherapist', 'psychodynamic']

psychosexual_list = ['Sexual and Relationship Psychotherapist', 'Psychosexual Psychotherapist']

psychosynthesis_list = ['Psychosynthesis Psychotherapeutic Counsellor', 'Integrative Psychosynthesis Psychotherapist',
                       'Psychosynthesis Psychotherapist', 'psychosynthesis']

relational_list = ['relational']

systemic_list = ['Systemic Psychotherapist', 'systemic']

transactional_list = ['Transactional Analysis Psychotherapist', 'transactional']

transpersonal_list = ['Integrative Transpersonal Psychotherapeutic Counsellor', 'Integrative Transpersonal Psychotherapist',
                     'Transpersonal Psychotherapist', 'transpersonal']

## Creating keyword lists

I will scant the profiles for these keywords in case some modalities are mentioned in other sections of the profile.

In [75]:
# Creating lists of keywords that I will search for when scanning profiles. I have lemmatized some words to reduce
# number of variations

adlerian_kwords = ['adlerian', 'individual psycho']

adolescent_kwords = ['adolescent', 'young people']

animal_kwords = ['animal assisted', 'canine assisted', 'therapy dog']

behavioural_kwords = ['behavio', 'cbt']

biodynamic_kwords = ['biodynamic']

# Body psychotherapy is also an umbrella term, therefore this list also includes biodynamic keyword
body_kwords = ['body psychotherap', 'body oriented psychotherap', 'somatic', 'organismic psychotherap', 'radix', 
                'biosynthesis', 'body mind', 'rubenfeld synergy', 'biodynamic']

brief_kwords = ['brief therap', 'solution focused']

cbt_kwords = ['cbt', 'cognitive behavio', 'cognitive and behavio']

child_kwords = ['child', 'sandtray', 'play therap']

cognitive_kwords = ['cognitive', 'cbt']

cognitive_analytic_kwords = ['cognitive analytic']

core_process_kwords = ['core process', 'mindfulness based', 'karuna']

couple_kwords = ['couple']

creative_arts_kwords = ['dance', 'drama', 'music', 'visual art', 'creative art', 'art psychotherap', 'the arts', 'art therap']

educational_kwords = ['educational therap', 'learning difficult']

existential_kwords = ['existential']

eft_kwords = ['emotionally focused']

family_kwords = ['family']

gestalt_kwords = ['gestalt']

group_kwords = ['group']

humanistic_kwords = ['humanistic']

hypno_kwords = ['hypno']

integrative_kwords = ['integrative', 'eclectic', 'multimodal']

interpersonal_kwords = ['interpersonal']

jungian_kwords = ['jung']

nlp_kwords = ['nlp', 'neuro linguistic', 'neurolinguistic']

parent_infant_kwords = ['infant', 'baby', 'babies']

person_centred_kwords = ['person cent', 'client cent', 'rogerian']

personal_construct_kwords = ['personal construct']

phenomenological_kwords = ['phenomenolog']

primal_kwords = ['primal']

relational_kwords = ['relational therap', 'relational psycho']

psychoanalysis_kwords = ['freudian', 'psychoanalyst']

psychodynamic_kwords = ['psychodynamic', 'psychoanalytic']

psychosexual_kwords = ['psychosexual', 'sexual', 'sex problem']

psychosynthesis_kwords = ['psychosynthesis']

systemic_kwords = ['systemic']

transactional_kwords = ['transactional']

transpersonal_kwords = ['transpersonal']

emdr_kwords = ['emdr', 'eye movement desensitization']

In [83]:
therapies_list = [adlerian_list, animal_list, behavioural_list, biodynamic_list, brief_list, cbt_list,
                  cognitive_list, cognitive_analytic_list, core_process_list, creative_arts_list,
                  educational_list, existential_list, eft_list, gestalt_list, group_list, 
                  hypno_list, integrative_list, interpersonal_list, jungian_list, nlp_list,
                  person_centred_list, personal_construct_list, phenomenological_list, primal_list, psychoanalysis_list,
                  psychodynamic_list, psychosexual_list, psychosynthesis_list, relational_list, systemic_list,
                  transactional_list, transpersonal_list, emdr_list]

In [84]:
keywords_list = [adlerian_kwords, animal_kwords, behavioural_kwords, biodynamic_kwords,
                 brief_kwords, cbt_kwords, cognitive_kwords, cognitive_analytic_kwords,
                 core_process_kwords, creative_arts_kwords, educational_kwords, existential_kwords, 
                 eft_kwords, gestalt_kwords, group_kwords, hypno_kwords, integrative_kwords,
                 interpersonal_kwords, jungian_kwords, nlp_kwords, person_centred_kwords, 
                 personal_construct_kwords, phenomenological_kwords, primal_kwords, 
                 psychoanalysis_kwords, psychodynamic_kwords, psychosexual_kwords, psychosynthesis_kwords, 
                 relational_kwords, systemic_kwords, transactional_kwords, transpersonal_kwords, emdr_kwords]

In [85]:
name_list = ['adlerian', 'animal assisted', 'behavioural', 'biodynamic', 'solution focused brief', 'CBT',
             'cognitive', 'cognitive analytic', 'core process', 'creative arts', 'educational',
             'existential', 'EFT', 'gestalt', 'group', 'hypnotherapy', 'integrative', 'interpersonal',
             'jungian', 'NLP', 'person centred', 'personal construct', 'phenomenological', 'primal', 
             'psychoanalysis','psychodynamic', 'psychosexual', 'psychosynthesis', 'relational', 'systemic', 'transactional',
             'transpersonal', 'EMDR']

In [86]:
print(len(name_list))
print(len(keywords_list))
print(len(name_list))

33
33
33


## Creating profile-scanning funcions

In [59]:
# A mini-function that checks if any of keywords provided are found in a specific therapist's profile.
# I will use it in a larger function for better readability.

# Args: name = name of the modality, index = row of df (a specific profile), kword_list = words to look for in a profile
def kword_scan(name, index, kword_list):
    # EMDR is often listed in a different section from all other therapies
    if name == 'EMDR': 
        # combining text from other sections and making all lower-case
        text = str(joined_df.iloc[index, [2, 3, 4, 6]].values).lower() 
    else:
        text = str(joined_df.iloc[index, [2, 3, 6]].values).lower()
    kword_match = 0
    for keyword in kword_list: # searching for keywords in the combined text
        if keyword in text:
            kword_match += 1
    return kword_match

In [87]:
# A function that will scan the profiles one by one and count the number of times each therapy is mentioned.
# It first looks for specific therapies as they are listed under 'Types of Therapies Offered' section,
# and then, if no match found, also checks other sections of the profile.

count_dict = {} # empty dictionary where I will add results

# Args: name = name of the modality, therapies_list = modality names to search for, keyword_list = keywords to search for
def profile_scan(name, therapies_list, keyword_list):
    
    count = 0

    for i in range(0, len(joined_df)):
        
        # Transforming lists to sets
        listed_types = set(str(joined_df.loc[i, 'Types of Therapies Offered']).lower().split(', '))
        target_types = set([x.lower() for x in therapies_list])
        
        # Combining the sets to identify any matches. If there are no matches, its length will be 0
        match_set = listed_types & target_types
        
        # Searching for a match - first, in the "Types of Therapies Offered" section, and then in main text body
        if len(match_set) > 0:
            count += 1
        else:
            if kword_scan(name, i, keyword_list) > 0:
                count += 1
     
    # count and name added to the dictionary
    count_dict[name] = count

## Part 3: Scanning profiles and counting therapy types

In [88]:
for name, t_list, k_list in zip(name_list, therapies_list, keywords_list):
    profile_scan( name, t_list, k_list)
    print(len(count_dict))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33


In [7]:
# A function to transform dictionary to dataframe, clean up indexing, rename columns, and calculate percentage
def dict_to_df(dict_):
    df = pd.DataFrame.from_dict(dict_, orient="index", columns=['count']).sort_values('count', ascending=False)
    df = df.reset_index()
    df.columns = ['type', 'count']
    df['availability'] = round(df['count'].apply(lambda x: x/19907*100), 1)
    return df

In [90]:
therapy_count_df = dict_to_df(count_dict)

In [91]:
therapy_count_df

Unnamed: 0,type,count,availability
0,integrative,11912,59.8
1,person centred,9268,46.6
2,psychodynamic,7752,38.9
3,behavioural,7542,37.9
4,cognitive,6172,31.0
5,solution focused brief,5494,27.6
6,CBT,5440,27.3
7,relational,4996,25.1
8,group,4296,21.6
9,creative arts,3860,19.4


In [92]:
#  therapy_count_df.to_csv('therapy_count.csv')

## Part 4: Counting Categories

I have counted the individual listings of therapies and I will group them in the following categories:
- Humanistic
- Cognitive Behavioural
- Relationship Focused
- Unconscious Focused
- Body Oriented
- Spiritual/Philisophical
- Expressive
- Other

Since each therapist can offer more than one type of therapy, the total number of therapies listed the way I counted them is much larger than the number of therapists. Therefore, to accurately calculate the availability of certain groups, I cannot just aggregate numbers I have just got, and need to do a similar scan using different parameters.


In [93]:
# Initiating lists #1 - modality names
human_list = humanistic_list + person_centred_list + gestalt_list + core_process_list + existential_list + transpersonal_list + phenomenological_list + brief_list
cog_beh_list = cbt_list + cognitive_list + behavioural_list + cognitive_analytic_list + nlp_list + hypno_list
relationships_list = relational_list + interpersonal_list + systemic_list + eft_list + psychosexual_list + transactional_list + group_list
unconscious_list = psychodynamic_list + psychoanalysis_list + jungian_list + personal_construct_list
body_oriented_list = biodynamic_list + emdr_list + body_list
spiritual_list = phenomenological_list + existential_list + psychosynthesis_list + transpersonal_list
expressive_list = creative_arts_list + primal_list

In [96]:
# Initiating lists #2 - keywords
human_keywords = humanistic_kwords + person_centred_kwords + gestalt_kwords + core_process_kwords + existential_kwords + transpersonal_kwords + phenomenological_kwords + brief_kwords
cog_beh_keywords = cbt_kwords + cognitive_kwords + behavioural_kwords + cognitive_analytic_kwords + nlp_kwords + hypno_kwords
relationships_keywords = relational_kwords + interpersonal_kwords + systemic_kwords + eft_kwords + psychosexual_kwords + transactional_kwords + group_kwords
unconscious_keywords = psychodynamic_kwords + psychoanalysis_kwords + jungian_kwords + personal_construct_kwords
body_oriented_keywords = biodynamic_kwords + emdr_kwords + body_kwords
spiritual_keywords = phenomenological_kwords + existential_kwords + psychosynthesis_kwords + transpersonal_kwords
expressive_keywords = creative_arts_kwords + primal_kwords

In [97]:
# Initiating lists #3 - combining modality names and keywords
category_list = [human_list, cog_beh_list, relationships_list, unconscious_list, body_oriented_list, spiritual_list,
              expressive_list]

category_keywords = [human_keywords, cog_beh_keywords, relationships_keywords, unconscious_keywords, body_oriented_keywords,
                  spiritual_keywords, expressive_keywords]

category_names = ['Humanistic', 'Cognitive Behavioural', 'Relationships Focused', 'Unconscious Focused', 'Body Oriented',
               'Philosophical/Spiritual', 'Expressive']

In [100]:
# resetting the dictionary
count_dict = {}

In [101]:
for name, c_list, c_keyword in zip(category_names, category_list, category_keywords):
    profile_scan(name, c_list, c_keyword)
    print(len(count_dict))

1
2
3
4
5
6
7


In [102]:
category_count_df = dict_to_df(count_dict)

In [103]:
category_count_df

Unnamed: 0,type,count,availability
0,Humanistic,13206,66.3
1,Relationships Focused,11250,56.5
2,Cognitive Behavioural,8209,41.2
3,Unconscious Focused,8175,41.1
4,Philosophical/Spiritual,5192,26.1
5,Expressive,3869,19.4
6,Body Oriented,1484,7.5


In [104]:
category_count_df.to_csv('category_count_df.csv')

# Part 5 - Type of Client
Some therapies are defined by the type of clients they attend to - child, adolescent, couple, etc. I will separate these from modalities based on a philosophy.

In [110]:
# Resetting dictionary
count_dict = {}

# Initiating lists
client_names = ['parent-infant', 'child', 'adolescent', 'couple', 'family']
client_list = [parent_infant_list, child_list, adolescent_list, couple_list, family_list]
client_keywords = [parent_infant_kwords, child_kwords, adolescent_kwords, couple_kwords, family_kwords]

In [112]:
for name, c_list, c_keyword in zip(client_names, client_list, client_keywords):
    profile_scan(name, c_list, c_keyword)
    print(len(count_dict))

1
2
3
4
5


I want to add 'adult' to this section, as while most therapists work with adults, some might be specialising if, e.g., child therapy.

In [None]:
# A smaller scan function which I can use to focus on specific columns
def small_scan(df, kword_lst, col_lst):
    count = 0
    for i in range(0,len(df)):
        text = str(df.iloc[i, col_lst].values).lower()
        text = re.sub('-', ' ', text)
        for word in kword_lst:
            if word in text:
                count += 1
    return count

In [138]:
# I will count the profiles that indicate working with individuals and adults, as well as those that have left the field blank, 
# as adult personal therapy is usually the default
adult_count = small_scan(joined_df, ['individuals', 'adults'], [-2])
unindicated = joined_df['I work with'].isna().sum()
adult_count = adult_count + unindicated
adult_perc = round(adult_count*100/19907, 2)
print('adult count:', adult_count)
print('adult perc:', adult_perc)

adult count: 19537
adult perc: 98.14


In [143]:
# Adding value to the dictionary
count_dict['adult'] = 19537

In [144]:
count_dict

{'parent-infant': 205,
 'child': 5307,
 'adolescent': 8127,
 'couple': 6900,
 'family': 4495,
 'adult': 19537}

In [145]:
client_count_df = dict_to_df(count_dict)

In [146]:
client_count_df

Unnamed: 0,type,count,availability
0,adult,19537,98.1
1,adolescent,8127,40.8
2,couple,6900,34.7
3,child,5307,26.7
4,family,4495,22.6
5,parent-infant,205,1.0


# Part 6 - Delivery type (online vs in person)

I would also like to know the availability for online therapy. I did not scrape this data from UKCP, but it is easily available on the website. For BACP, I will need to count it.

### UKCP - from website

- telephone 1592
- online 3083
- home visits 400
- face to face 4250

In [147]:
# Outdoor therapy is missing. Scanning the profiles for keywords
outdoor_count = small_scan(ukcp_df, ['outdoor', 'nature', 'wild therapy', 'walk and talk'], slice(2,8))
outdoor_count

209

In [148]:
# Same with sms/email therapy
sms_email_count = small_scan(ukcp_df, ['sms therapy', 'email therapy'], slice(2,8))
sms_email_count

1

### BACP
For BACP, I will need to perform value counts on the dataframe to get the data I need.

In [149]:
# A function that will calculate value-counts when there are multiple listings in each cell
def column_split_count(df, column):
    # Splitting the values
    new_df = pd.DataFrame(df[column].str.split(', ', expand=True))
    
    # Creating empty dataframe that I will add data to
    value_count = pd.DataFrame(columns = ['type', 'count'])
    
    # Creating a temporary dataframe with value counts for each column and concatenating them all to one
    for column in new_df:
        temp_df = pd.DataFrame(new_df[column].value_counts().reset_index())
        temp_df.columns = ['type', 'count']
        value_count = pd.concat([value_count, temp_df], axis=0, ignore_index=True)
        
    # Aggregating count values from all columns
    value_count = value_count.groupby('type').sum()
    
    # Dropping values with count=1 (corrupted data when scarping)
    value_count.drop(index=value_count[value_count['count']==1].index, inplace=True)
    
    # Sorting values
    value_count = value_count.sort_values(by='count', ascending=False)
    
    # Calculating the percentage
    value_count['perc'] = round(value_count['count'].apply(lambda x: x*100/12208), 1)
    
    value_count = value_count.reset_index()
    
    return value_count

In [150]:
# Calling the above function
bacp_delivery = column_split_count(bacp_df, 'How I deliver therapy')

In [151]:
bacp_delivery

Unnamed: 0,type,count,perc
0,Online counselling,10832,88.7
1,Long-term face-to-face work,10450,85.6
2,Short-term face-to-face work,10280,84.2
3,Telephone counselling,8241,67.5
4,Time-limited,7273,59.6
5,Long term sessions,2403,19.7
6,Short term sessions,2364,19.4
7,Home visits,1432,11.7
8,Outdoor therapy,568,4.7
9,Email therapy,206,1.7


In [152]:
# I want to join sms and email into one
sms_email_bacp = small_scan(bacp_df, ['sms therapy', 'email therapy'], slice(2,10))
sms_email_bacp

211

In [153]:
# I want to know total 'face-to-face' number, regardless of the lenght
face_count = small_scan(bacp_df, ['face to face'], [-1])
face_count

10580

In [1]:
# Creating a dataframe with joined BACP and UKCP data
joined_del_types = {'telephone':1592+8241,
                    'online':3083+10832,
                    'home visits':400+1432,
                    'face to face':4250+10580,
                    'outdoor':209+568,
                    'sms/email':211+1}

In [2]:
joined_del_types

{'telephone': 9833,
 'online': 13915,
 'home visits': 1832,
 'face to face': 14830,
 'outdoor': 777,
 'sms/email': 212}

In [8]:
joined_del_types = dict_to_df(joined_del_types)

In [9]:
joined_del_types

Unnamed: 0,type,count,availability
0,face to face,14830,74.5
1,online,13915,69.9
2,telephone,9833,49.4
3,home visits,1832,9.2
4,outdoor,777,3.9
5,sms/email,212,1.1
