In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from seaborn import set_style
set_style("whitegrid")

In [10]:
def clean_comments_csv(input_filepath, output_filepath):
    # Read the CSV file into a DataFrame
    comment_df = pd.read_csv(input_filepath)
    
    # Drop duplicates based on 'Post' and 'Comment' columns
    cleaned_df = comment_df.drop_duplicates(
        subset=['Post', 'Comment'],
        keep='last'
    ).reset_index(drop=True)
    
    # Print the number of duplicates dropped
    print(len(comment_df) - len(cleaned_df), 'duplicates dropped from', input_filepath)
    
    # Remove rows where 'Comment' is 'Comment'
    cleaned_df = cleaned_df[cleaned_df.Comment != 'Comment']
    
    # Save the cleaned DataFrame to the new file
    cleaned_df.to_csv(output_filepath, index=False)

In [11]:
input_filepath = 'C:/Users/ESK/Documents/DataScience/working/datafiles/cleaned/cleaned_BorderlinePDisorder_coded.csv'
output_filepath = 'cleaned_coded_vset.csv'

In [12]:
#coded comments
clean_comments_csv(input_filepath, output_filepath)

1 duplicates dropped from C:/Users/ESK/Documents/DataScience/working/datafiles/cleaned/cleaned_BorderlinePDisorder_coded.csv


In [17]:
#create separate dataframes 
coded_df = pd.read_csv("C:/Users/ESK/Documents/DataScience/working/project-mh/cleaned_coded_vset.csv")


In [18]:
coded_df

Unnamed: 0.1,Unnamed: 0,Comment,Author,Post,relevance
0,49047,"adding to your question, did anyone have a the...",grassycroissant,1bmk9m2,
1,49048,Like 10 years. I’ve had good experiences and b...,charlottewonder,1bmk9m2,
2,49049,"One session and I dipped , because I literally...",Right_now78,1bmk9m2,
3,49050,"I’m 28f go every two weeks now, used to go wee...",ComplaintRepulsive52,1bmk9m2,
4,49051,I’ve been seeing a therapist since the age of ...,ChocCoveredSarcasm,1bmk9m2,1
...,...,...,...,...,...
27671,98069,[deleted],,w5q5qv,
27672,98070,I was on zoloft for several years in my early ...,KronikHaze,w5q5qv,1
27673,98071,I took Lamictal and Lithium and I hated both.,KronikHaze,w5q5qv,1
27674,98072,What was your dosage for lamictal? (I never tr...,aluap_mia,w5q5qv,


In [19]:
coded_df.drop(columns=["Unnamed: 0"], inplace=True)

In [20]:
print(coded_df)

                                                 Comment  \
0      adding to your question, did anyone have a the...   
1      Like 10 years. I’ve had good experiences and b...   
2      One session and I dipped , because I literally...   
3      I’m 28f go every two weeks now, used to go wee...   
4      I’ve been seeing a therapist since the age of ...   
...                                                  ...   
27671                                          [deleted]   
27672  I was on zoloft for several years in my early ...   
27673      I took Lamictal and Lithium and I hated both.   
27674  What was your dosage for lamictal? (I never tr...   
27675  You could also be experiencing a mixed episode...   

                     Author     Post relevance  
0           grassycroissant  1bmk9m2       NaN  
1           charlottewonder  1bmk9m2       NaN  
2               Right_now78  1bmk9m2       NaN  
3      ComplaintRepulsive52  1bmk9m2       NaN  
4        ChocCoveredSarcasm  1bmk9

In [21]:
# Display the DataFrame before replacement
print("Before replacement:")
print(coded_df)

# Replace 'q' with 1 in the 'relevance' column
coded_df['relevance'] = coded_df['relevance'].replace('q', '1')

# Display the DataFrame after replacement
print("\nAfter replacement:")
print(coded_df)

Before replacement:
                                                 Comment  \
0      adding to your question, did anyone have a the...   
1      Like 10 years. I’ve had good experiences and b...   
2      One session and I dipped , because I literally...   
3      I’m 28f go every two weeks now, used to go wee...   
4      I’ve been seeing a therapist since the age of ...   
...                                                  ...   
27671                                          [deleted]   
27672  I was on zoloft for several years in my early ...   
27673      I took Lamictal and Lithium and I hated both.   
27674  What was your dosage for lamictal? (I never tr...   
27675  You could also be experiencing a mixed episode...   

                     Author     Post relevance  
0           grassycroissant  1bmk9m2       NaN  
1           charlottewonder  1bmk9m2       NaN  
2               Right_now78  1bmk9m2       NaN  
3      ComplaintRepulsive52  1bmk9m2       NaN  
4        ChocC

In [22]:
#Get rid of the rows that contain no comments
coded_df.dropna(subset=['Comment'], inplace=True)
coded_df

Unnamed: 0,Comment,Author,Post,relevance
0,"adding to your question, did anyone have a the...",grassycroissant,1bmk9m2,
1,Like 10 years. I’ve had good experiences and b...,charlottewonder,1bmk9m2,
2,"One session and I dipped , because I literally...",Right_now78,1bmk9m2,
3,"I’m 28f go every two weeks now, used to go wee...",ComplaintRepulsive52,1bmk9m2,
4,I’ve been seeing a therapist since the age of ...,ChocCoveredSarcasm,1bmk9m2,1
...,...,...,...,...
27671,[deleted],,w5q5qv,
27672,I was on zoloft for several years in my early ...,KronikHaze,w5q5qv,1
27673,I took Lamictal and Lithium and I hated both.,KronikHaze,w5q5qv,1
27674,What was your dosage for lamictal? (I never tr...,aluap_mia,w5q5qv,


In [23]:
#drop rows with no author
coded_df.dropna(subset=['Author'], inplace=True)
coded_df

Unnamed: 0,Comment,Author,Post,relevance
0,"adding to your question, did anyone have a the...",grassycroissant,1bmk9m2,
1,Like 10 years. I’ve had good experiences and b...,charlottewonder,1bmk9m2,
2,"One session and I dipped , because I literally...",Right_now78,1bmk9m2,
3,"I’m 28f go every two weeks now, used to go wee...",ComplaintRepulsive52,1bmk9m2,
4,I’ve been seeing a therapist since the age of ...,ChocCoveredSarcasm,1bmk9m2,1
...,...,...,...,...
27670,I didn't have problems switching from one to a...,Doanya,w5q5qv,
27672,I was on zoloft for several years in my early ...,KronikHaze,w5q5qv,1
27673,I took Lamictal and Lithium and I hated both.,KronikHaze,w5q5qv,1
27674,What was your dosage for lamictal? (I never tr...,aluap_mia,w5q5qv,


In [25]:
coded_df = coded_df[coded_df['Comment'] != '[deleted]']
coded_df

Unnamed: 0,Comment,Author,Post,relevance
0,"adding to your question, did anyone have a the...",grassycroissant,1bmk9m2,
1,Like 10 years. I’ve had good experiences and b...,charlottewonder,1bmk9m2,
2,"One session and I dipped , because I literally...",Right_now78,1bmk9m2,
3,"I’m 28f go every two weeks now, used to go wee...",ComplaintRepulsive52,1bmk9m2,
4,I’ve been seeing a therapist since the age of ...,ChocCoveredSarcasm,1bmk9m2,1
...,...,...,...,...
27670,I didn't have problems switching from one to a...,Doanya,w5q5qv,
27672,I was on zoloft for several years in my early ...,KronikHaze,w5q5qv,1
27673,I took Lamictal and Lithium and I hated both.,KronikHaze,w5q5qv,1
27674,What was your dosage for lamictal? (I never tr...,aluap_mia,w5q5qv,


In [28]:
val_df = coded_df[coded_df['relevance'] == '1']

# Display the new DataFrame
print("\nNew DataFrame where 'relevance' is 1:")
print(val_df)


New DataFrame where 'relevance' is 1:
                                                 Comment               Author  \
4      I’ve been seeing a therapist since the age of ...   ChocCoveredSarcasm   
5      i’ve been in and out of therapy (mostly in) fo...          oddthing757   
6      Been in regular (twice a week) therapy for ove...         bedrock_BEWD   
9      In therapy since I was 14. It's been over 20 y...  Own_Collection_8916   
11     Tried CBT for years, and never got anywhere. \...       sky-amethyst23   
...                                                  ...                  ...   
27659  I'm on 20mg abilify and 400mg epilim. I've bee...            formobymo   
27660  I was prescribed Lamictal once and had a sever...               Doanya   
27661  300mg quetiapine and 40mg fluoxetine.\n\nI use...             Ddog1909   
27672  I was on zoloft for several years in my early ...           KronikHaze   
27673      I took Lamictal and Lithium and I hated both.           Kro

In [29]:
val_df.to_csv('val_df.csv', index=False)