# Read an Join Datasets

This notebook containes all the operations done to read and clean datasets:



In [40]:
import pandas as pd
import json

## Hugging Face Dataset

*   https://huggingface.co/datasets/Amod/mental_health_counseling_conversations

In [50]:
# Read the file line by line
file = open( "combined_dataset.json" , "r" )

In [51]:
# Load JSON data
with open('combined_dataset.json') as file:
    data = [line for line in file]

In [52]:
# Initialize lists to hold contexts and responses
contexts = []
responses = []

# Parse each JSON string and extract the relevant data
for json_str in data:
    data_row = json.loads(json_str)
    contexts.append(data_row['Context'])
    responses.append(data_row['Response'])

# Create the DataFrame
df1 = pd.DataFrame({
    'Context': contexts,
    'Response': responses
})


In [53]:
df1.head() # first rows

Unnamed: 0,Context,Response
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb..."
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see..."
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...


In [54]:
df1.shape # number of rows and columns

(3512, 2)

## Second Dataset

-
CounselChat: Mental health answers from counselors
CounselChat
https://counselchat.com

In [56]:
df2 = pd.read_csv("20200325_counsel_chat.csv")

In [57]:
df2.head() # first rows

Unnamed: 0.1,Unnamed: 0,questionID,questionTitle,questionText,questionLink,topic,therapistInfo,therapistURL,answerText,upvotes,views,split
0,0,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Sherry Katz, LCSWCouples and Family Therapist,...",https://counselchat.com/therapists/sherry-katz...,"If everyone thinks you're worthless, then mayb...",1,2899,train
1,1,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Robin Landwehr, DBH, LPCC, NCCMental Health in...",https://counselchat.com/therapists/robin-landw...,"Hello, and thank you for your question and see...",1,3514,train
2,2,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,Lee KingI use an integrative approach to treat...,https://counselchat.com/therapists/lee-king,First thing I'd suggest is getting the sleep y...,0,5,train
3,3,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Shauntai Davis-YearginPersonalized, private on...",https://counselchat.com/therapists/shauntai-da...,Therapy is essential for those that are feelin...,0,31,train
4,4,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,Jordan WhiteLicensed Social Worker at Oak Root...,https://counselchat.com/therapists/jordan-white,I first want to let you know that you are not ...,0,620,train


In [58]:
df2.shape # number of rows and columns

(2129, 12)

In [61]:
print(f"The dataset has: {len(df2['topic'].unique())} different topics")

The dataset has: 31 different topics


In [62]:
# Number of rows per topic
df2["topic"].value_counts() # The topic with more rows is "depression". However, the numbers of rows would be low to use just this topic for model fine-tuning

topic
depression                  330
anxiety                     249
counseling-fundamentals     240
intimacy                    205
relationships               174
parenting                   145
family-conflict             116
self-esteem                  83
relationship-dissolution     76
trauma                       66
behavioral-change            51
marriage                     46
lgbtq                        38
anger-management             38
substance-abuse              35
spirituality                 34
professional-ethics          32
workplace-relationships      26
diagnosis                    22
domestic-violence            21
social-relationships         20
grief-and-loss               18
self-harm                    11
sleep-improvement            10
eating-disorders             10
legal-regulatory              8
stress                        7
children-adolescents          6
addiction                     5
human-sexuality               4
military-issues               3
Na

## Merge Datasets

In [63]:
# Create an empty DataFrame df2
df2_reduced = pd.DataFrame()

In [64]:
df2_reduced["Context"] = df2["questionText"]

In [65]:
df2_reduced["Response"] = df2["answerText"]

In [66]:
df2_reduced

Unnamed: 0,Context,Response
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb..."
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see..."
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...
...,...,...
2124,"After first meeting the client, what is the pr...",There are probably no two therapists alike bec...
2125,"After first meeting the client, what is the pr...","Each counselor may have a different process, s..."
2126,"After first meeting the client, what is the pr...","After meeting a client, many Counselors will a..."
2127,"After first meeting the client, what is the pr...",A good therapist will discuss what brought you...


In [67]:
result = pd.concat([df1,df2_reduced] , ignore_index=True)

In [68]:
result

Unnamed: 0,Context,Response
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb..."
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see..."
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...
...,...,...
5636,"After first meeting the client, what is the pr...",There are probably no two therapists alike bec...
5637,"After first meeting the client, what is the pr...","Each counselor may have a different process, s..."
5638,"After first meeting the client, what is the pr...","After meeting a client, many Counselors will a..."
5639,"After first meeting the client, what is the pr...",A good therapist will discuss what brought you...


In [69]:
result.to_csv('combined_data.csv', index=False) # Storing the combined dataset

In [70]:
df = pd.read_csv("combined_data.csv") # Reading the combined dataset to confim it stored well

In [71]:
df

Unnamed: 0,Context,Response
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb..."
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see..."
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...
...,...,...
5636,"After first meeting the client, what is the pr...",There are probably no two therapists alike bec...
5637,"After first meeting the client, what is the pr...","Each counselor may have a different process, s..."
5638,"After first meeting the client, what is the pr...","After meeting a client, many Counselors will a..."
5639,"After first meeting the client, what is the pr...",A good therapist will discuss what brought you...
