In [7]:
import pandas as pd

# Load the datasets
sirs_file_path = '../data/Dartmouth Data Set- SIRS .xlsx'
feis_file_path = '../data/Dartmouth FEIS Data.xlsx'

sirs_data = pd.read_excel(sirs_file_path)
feis_data = pd.read_excel(feis_file_path)

# Renaming the column in FEIS dataset to match the SIRS dataset for merging
feis_data.rename(columns={'Respondent ID #  (SIRS Local ID)': 'Local ID'}, inplace=True)

# Merging the datasets on 'Local ID'
merged_data = pd.merge(sirs_data, feis_data, on='Local ID', how='inner')

In [8]:
merged_data

Unnamed: 0,Local ID,Date Enrolled in START,Status,Status Date,Time Enrolled in START,Source of referral to START,Suitability of enrollment in START,Individual/Caregiver reliable access to technology,Presenting problems at time of enrollment,Services at Enrollment,...,"In\nthe past year, did your family member use in-patient psychiatric services?","If\nyes, were the inpatient services that your family member received helpful to\nhim/her in your opinion? ?",How\nmuch help was available to you at night or on weekends if your family member\nhad a crisis?,Are\nthere options outside of the hospital for individuals experiencing a crisis to\ngo for help (i.e. crisis/hospital diversion beds)?,Who\nwas the primary source of information about your family memberâ€™s mental health\nservices?,"If other, please describe..2","During the past year, how much involvement\ndid you want to have in your family memberâ€™s treatment plan?",Was there any particular service that your\nfamily member needed that was not available?,"If yes, please describe the service.",What\nadvice would you give to service planners regarding the mental health service\nneeds of persons with IDD and their families?
0,321686,2015-09-01,Active,2015-09-30,Business Hours (Monday - Friday 8am - 5pm),Case Manager/Service Coordinator,Appropriate,,"Aggression (physical, verbal, property destruc...","Behavioral support services, Case management/s...",...,No,,Very little,None at all,Your family member him/herself,,A lot,No,,
1,379503C,2016-01-01,Active,2016-01-28,Business Hours (Monday - Friday 8am - 5pm),Other: Managed Care Organization,Appropriate,,"Aggression (physical, verbal, property destruc...",,...,Yes,None at all,None at all,None at all,Other,Hospital psychiatrist,A lot,Yes,Therapeutic/ABA/In home help/ Male Mentorship ...,To understnad the frustration of families who ...
2,401268C,2016-03-01,Active,2016-03-03,Business Hours (Monday - Friday 8am - 5pm),Case Manager/Service Coordinator,Appropriate,,Self-injurious,Enhanced staffing (1:1 or 2:1 staff),...,Yes,,,,,,,,,
3,382885,2016-07-01,Active,2016-07-11,Business Hours (Monday - Friday 8am - 5pm),Case Manager/Service Coordinator,Appropriate,,"Aggression (physical, verbal, property destruc...","Case management/service coordination, Mental h...",...,No,Did not know/answer,None at all,"Some, but not as much as was needed/wanted",His/her psychiatrist,,A lot,Yes,Crisis services,
4,322773,2016-09-01,Active,2016-09-09,Business Hours (Monday - Friday 8am - 5pm),Other: OPWDD,Appropriate,,"Aggression (physical, verbal, property destruc...","Case management/service coordination, Mental h...",...,No,None at all,None at all,None at all,His/her psychiatrist,,A lot,Yes,Ivan has no insurance and no services. He nee...,It is hard. No one helps you. You have to ke...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,8093984,2020-12-01,Inactive (stable functioning),2021-12-08,Business Hours (Monday - Friday 8am - 5pm),Case Manager/Service Coordinator,Appropriate,"Laptop/desktop, Telephone landline","Aggression (physical, verbal, property destruc...","Behavioral support services, Case management/s...",...,Yes,"Some, but not as much as was needed/wanted",Very little,Very little,Your family member him/herself,,A lot,Yes,Not sure what is missing,
1093,6264999,2020-08-01,Inactive (no longer requesting services),2021-12-09,Business Hours (Monday - Friday 8am - 5pm),Case Manager/Service Coordinator,Appropriate,"Cell phone service, Smartphone","Aggression (physical, verbal, property destruc...","Behavioral support services, Case management/s...",...,Yes,"Some, but not as much as was needed/wanted",Very little,None at all,Your family member him/herself,,Very little,No,,
1094,8191483,2020-02-01,Inactive (no longer requesting services),2021-12-09,Business Hours (Monday - Friday 8am - 5pm),Family member,Appropriate,,"Aggression (physical, verbal, property destruc...",Case management/service coordination,...,,None at all,"Some, but not as much as was needed/wanted",None at all,No one,,Very little,No,,
1095,427769,2020-05-01,Inactive (stable functioning),2021-12-14,Business Hours (Monday - Friday 8am - 5pm),Case Manager/Service Coordinator,Appropriate,,"Aggression (physical, verbal, property destruc...",Case management/service coordination,...,No,None at all,None at all,None at all,No one,,A lot,Yes,CRISIS SUPPORT,


In [6]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import nltk
from nltk.corpus import stopwords

# Load the dataset
file_path = '../data/Dartmouth FEIS Data.xlsx'
df = pd.read_excel(file_path, sheet_name='Cohort')

# Extract relevant text columns
text_columns = [
    'If yes, please describe the service.',
    'What\nadvice would you give to service planners regarding the mental health service\nneeds of persons with IDD and their families?'
]
date_column = 'End Date'

# Ensure the Date column is in datetime format
df[date_column] = pd.to_datetime(df[date_column], errors='coerce')

# Split the dataset into before and after COVID subsets
covid_start_date = pd.Timestamp('2020-03-13')
before_covid_df = df[df[date_column] < covid_start_date]
after_covid_df = df[df[date_column] >= covid_start_date]

# Function to preprocess text data and apply LDA
def process_lda(text_data, n_topics=10, n_words=3):
    # Combine the text columns into a single text corpus
    text_corpus = text_data.apply(lambda row: ' '.join(row), axis=1)
    
    # Preprocess the text data
    nltk.download('stopwords')
    custom_stopwords = ['services']
    stop_words = list(set(stopwords.words('english'))) + custom_stopwords
    vectorizer = CountVectorizer(stop_words=stop_words)
    text_matrix = vectorizer.fit_transform(text_corpus)
    
    # Apply LDA for topic modeling
    lda = LatentDirichletAllocation(n_components=n_topics, random_state=69)
    lda.fit(text_matrix)
    
    # Function to display topics
    def display_topics(model, feature_names, no_top_words):
        topics = []
        for topic_idx, topic in enumerate(model.components_):
            topic_words = [feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]]
            topics.append(' '.join(topic_words))
        return topics
    
    feature_names = vectorizer.get_feature_names_out()
    topics = display_topics(lda, feature_names, n_words)
    
    return topics

# Extract text data and apply LDA for before and after COVID subsets
before_covid_text_data = before_covid_df[text_columns].fillna('').astype(str)
after_covid_text_data = after_covid_df[text_columns].fillna('').astype(str)

before_covid_topics = process_lda(before_covid_text_data)
after_covid_topics = process_lda(after_covid_text_data)

# Display the topics for both subsets
print("Topics before COVID-19:")
for idx, topic in enumerate(before_covid_topics):
    print(f"Topic {idx+1}: {topic}")

print("\nTopics after COVID-19:")
for idx, topic in enumerate(after_covid_topics):
    print(f"Topic {idx+1}: {topic}")

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/orenpoleshuckkinel/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Topics before COVID-19:
Topic 1: need school child
Topic 2: health mental help
Topic 3: family psychiatrist need
Topic 4: respite available need
Topic 5: none home respite
Topic 6: respite help family
Topic 7: answer crisis help
Topic 8: respite home help
Topic 9: therapy crisis behavior
Topic 10: respite community habilitation

Topics after COVID-19:
Topic 1: families know providers
Topic 2: help speech someone
Topic 3: health family access
Topic 4: program day help
Topic 5: family aba crisis
Topic 6: need help inpatient
Topic 7: medication hospital therapy
Topic 8: respite support home
Topic 9: ask skills social
Topic 10: support therapy mental


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/orenpoleshuckkinel/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
