In [None]:
# Creating Question Dimension

# Mapping dictionary
question_mapping = {
    'Q01': 'Percentage of older adults who are eating 2 or more fruits daily',
    'Q02': 'Percentage of older adults who are eating 3 or more vegetables daily',
    'Q03': 'Percentage of older adults who are experiencing frequent mental distress',
    'Q04': 'Percentage of older adults who have been told they have high blood pressure who report currently taking medication for their high blood pressure',
    'Q05': 'Percentage of older adults who have fallen and sustained an injury within last year',
    'Q07': 'Percentage of older adults who report having lost 5 or fewer teeth due to decay or gum disease',
    'Q08': 'Physically unhealthy days (mean number of days in past month)',
    'Q09': 'Percentage of at risk adults (have diabetes, asthma, cardiovascular disease or currently smoke) who ever had a pneumococcal vaccine',
    'Q10': 'Percentage of older adult men who are up to date with select clinical preventive services',
    'Q11': 'Percentage of older adult women who are up to date with select clinical preventive services',
    'Q12': 'Percentage of older adult women who have received a mammogram within the past 2 years',
    'Q13': 'Percentage of older adults who are currently obese, with a body mass index (BMI) of 30 or more',
    'Q14': 'Percentage of older adults who had a cholesterol screening within the past 5 years',
    'Q15': 'Percentage of older adults who had either a home blood stool test within the past year or a sigmoidoscopy or colonoscopy within the past 10 years',
    'Q16': 'Percentage of older adults who have not had any leisure time physical activity in the past month',
    'Q17': 'Percentage of older adults who have smoked at least 100 cigarettes in their entire life and still smoke every day or some days',
    'Q18': 'Percentage of older adults who reported influenza vaccine within the past year',
    'Q19': 'Percentage of older adults without diabetes who reported a blood sugar or diabetes test within 3 years',
    'Q20': 'Percentage of older adult women with an intact cervix who had a Pap test within the past 3 years',
    'Q21': 'Percentage of older adults who reported binge drinking within the past 30 days',
    'Q22': 'Percentage of older adults who have ever been told by a health professional that they have high blood pressure',
    'Q27': 'Percentage of older adults with a lifetime diagnosis of depression',
    'Q30': 'Percentage of older adults who reported subjective cognitive decline or memory loss that is happening more often or is getting worse in the preceding 12 months',
    'Q31': 'Percentage of older adults who reported subjective cognitive decline or memory loss that interferes with their ability to engage in social activities or household chores',
    'Q32': 'Percentage of older adults who self-reported that their health is "fair" or "poor"',
    'Q33': 'Percentage of older adults who self-reported that their health is "good", "very good", or "excellent"',
    'Q34': 'Percentage of older adults getting sufficient sleep (>6 hours)',
    'Q35': 'Mean number of days with activity limitations in the past month',
    'Q36': 'Percentage of older adults who provided care for a friend or family member within the past month',
    'Q37': 'Percentage of older adults currently not providing care who expect to provide care for someone with health problems in the next two years',
    'Q38': 'Percentage of older adults who provided care to a friend or family member for six months or more',
    'Q39': 'Average of 20 or more hours of care per week provided to a friend or family member',
    'Q40': 'Percentage of older adults who provided care for someone with dementia or other cognitive impairment within the past month',
    'Q41': 'Percentage of older adults who reported that as a result of subjective cognitive decline or memory loss that they need assistance with day-to-day activities',
    'Q42': 'Percentage of older adults with subjective cognitive decline or memory loss who reported talking with a health care professional about it',
    'Q43': 'Percentage of older adults ever told they have arthritis',
    'Q44': 'Severe joint pain due to arthritis among older adults with doctor-diagnosed arthritis',
    'Q45': 'Fair or poor health among older adults with doctor-diagnosed arthritis',
    'Q46': 'Percentage of older adults who report having a disability (includes limitations related to sensory or mobility impairments or a physical, mental, or emotional condition)'
}

unique_question_ids = df_cleaned['QuestionID'].unique()
# Converting the array of unique values into a DataFrame
unique_question_df = pd.DataFrame(unique_question_ids, columns=['question_id'])

# Applying the mapping to create a new column with descriptions
unique_question_df['question_desc'] = unique_question_df['question_id'].map(question_mapping)
unique_question_df = unique_question_df[unique_question_df['question_id'] != '<NA>']
unique_question_df

In [None]:
# Creating Class Dimension

# Mapping dictionary
class_mapping = {
    'C01': 'Overall Health',
    'C02': 'Nutrition/Physical Activity/Obesity',
    'C03': 'Screenings and Vaccines',
    'C04': 'Smoking and Alcohol Use',
    'C05': 'Mental Health',
    'C06': 'Cognitive Decline',
    'C07': 'Caregiving'
}

unique_class_ids = df_cleaned['ClassID'].unique()
# Converting the array of unique values into a DataFrame
unique_class_df = pd.DataFrame(unique_class_ids, columns=['class_id'])

# Applying the mapping to create a new column with descriptions
unique_class_df['class_desc'] = unique_class_df['class_id'].map(class_mapping)
unique_class_df = unique_class_df[unique_class_df['class_id'] != '<NA>']
unique_class_df

In [None]:
# Creating Topic Dimension

# Mapping dictionary
topic_mapping = {
    'TOC11': 'Arthritis among older adults',
    'TAC03': 'Binge drinking within past 30 days',
    'TSC06': 'Cholesterol checked in past 5 years',
    'TSC02': 'Colorectal cancer screening',
    'TAC01': 'Current smoking',
    'TSC04': 'Diabetes screening within past 3 years',
    'TOC10': 'Disability status, including sensory or mobility limitations',
    'TGC03': 'Duration of caregiving among older adults',
    'TNC01': 'Eating 2 or more fruits daily',
    'TNC02': 'Eating 3 or more vegetables daily',
    'TSC09': 'Ever had pneumococcal vaccine',
    'TGC02': 'Expect to provide care for someone in the next two years',
    'TOC13': 'Fair or poor health among older adults with arthritis',
    'TOC06': 'Fall with injury within last year',
    'TMC01': 'Frequent mental distress',
    'TCC02': 'Functional difficulties associated with subjective cognitive decline or memory loss among older adults',
    'TSC07': 'High blood pressure ever',
    'TSC08': 'Influenza vaccine within past year',
    'TGC04': 'Intensity of caregiving among older adults',
    'TMC03': 'Lifetime diagnosis of depression',
    'TSC01': 'Mammogram within past 2 years',
    'TCC03': 'Need assistance with day-to-day activities because of subjective cognitive decline or memory loss',
    'TNC03': 'No leisure-time physical activity within past month',
    'TNC04': 'Obesity',
    'TOC05': 'Oral health:  tooth retention',
    'TSC03': 'Pap test within past 3 years',
    'TOC01': 'Physically unhealthy days (mean number of days)',
    'TOC09': 'Prevalence of sufficient sleep',
    'TGC01': 'Provide care for a friend or family member in past month',
    'TGC05': 'Provide care for someone with cognitive impairment within the past month',
    'TOC03': 'Recent activity limitations in past month',
    'TOC07': 'Self-rated health (fair to poor health)',
    'TOC08': 'Self-rated health (good to excellent health)',
    'TOC12': 'Severe joint pain among older adults with arthritis',
    'TCC01': 'Subjective cognitive decline or memory loss among older adults',
    'TOC04': 'Taking medication for high blood pressure',
    'TCC04': 'Talked with health care professional about subjective cognitive decline or memory loss',
    'TSC10': 'Up-to-date with recommended vaccines and screenings - Men',
    'TSC11': 'Up-to-date with recommended vaccines and screenings - Women'
}

unique_topic_ids = df_cleaned['TopicID'].unique()
# Converting the array of unique values into a DataFrame
unique_topic_df = pd.DataFrame(unique_topic_ids, columns=['topic_id'])

# Applying the mapping to create a new column with descriptions
unique_topic_df['topic_desc'] = unique_topic_df['topic_id'].map(topic_mapping)
unique_topic_df = unique_topic_df[unique_topic_df['topic_id'] != '<NA>']
unique_topic_df

In [None]:
import pandas as pd
import numpy as np
import json
import requests
import re
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from io import StringIO
import os
from azure.storage.blob import BlobServiceClient
import pandas as pd
from io import BytesIO, StringIO
import sqlalchemy

# Azure Functions


def azure_upload_blob(connect_str, container_name, blob_name, data):
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    blob_client.upload_blob(data, overwrite=True)
    print(f"Uploaded to Azure Blob: {blob_name}")


def azure_download_blob(connect_str, container_name, blob_name):
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    download_stream = blob_client.download_blob()
    return download_stream.readall()


URL = "https://data.cdc.gov/api/views/hfr9-rurv/rows.csv?accessType=DOWNLOAD"

response = requests.get(URL, verify=False)
if response.status_code == 200:
    # Decode the content and read into DataFrame
    df_raw = pd.read_csv(BytesIO(response.content))
    # Print the first few rows to verify if the data has been read successfully
    print(df_raw.head())
    print(df_raw.columns)
    print(df_raw.shape)
    df_raw.info()
else:
    print("Failed to download the file.")

df_cleaned = df_raw.copy()
df_cleaned = df_raw.drop(columns=  ['LocationAbbr','Data_Value_Footnote_Symbol','Data_Value_Footnote','Datasource','Data_Value_Unit','Data_Value_Alt','StratificationCategory1','StratificationCategoryID1','StratificationID1','StratificationCategoryID2','StratificationID2'])
df_cleaned = df_cleaned.dropna(subset=['Data_Value'])
df_cleaned.info()

df_cleaned['Longitude'] = None
df_cleaned['Latitude'] = None

for index, row in df_cleaned.iterrows():
    geolocation = row['Geolocation']
    if geolocation:
        match = re.match(r'POINT \(([-0-9.]+) ([-0-9.]+)\)', geolocation)
        if match:
            longitude = float(match.group(1))
            latitude = float(match.group(2))
            df_cleaned.at[index, 'Longitude'] = longitude
            df_cleaned.at[index, 'Latitude'] = latitude

alzheimer_df = df_cleaned.copy()

alzheimer_df.head(5)