In [None]:
!pip install pywaffle

In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import seaborn as sns
from pywaffle import Waffle

In [None]:
mpl.rcParams['figure.dpi']=300
palette = {'A82331', 'DC5663'}
background_color = '#FFF4EE'
main_color = '#A82331'

In [None]:
nyc_hiv_by_age = pd.read_csv('../data/HIV_AIDS_Diagnoses_by_Neighborhood__Age_Group__and_Race_Ethnicity_20240129.csv')
nyc_hiv_by_age.head()

In [None]:
nyc_hiv_by_age = nyc_hiv_by_age[['YEAR', 'NEIGHBORHOOD', 'RACE/ETHNICITY', 'AGE', 'TOTAL NUMBER OF HIV DIAGNOSES', 'HIV DIAGNOSES PER 100,000 POPULATION', 'Borough']]

In [None]:
nyc_hiv_by_age['TOTAL NUMBER OF HIV DIAGNOSES'] = nyc_hiv_by_age['TOTAL NUMBER OF HIV DIAGNOSES'].replace('*', 0)
nyc_hiv_by_age['TOTAL NUMBER OF HIV DIAGNOSES'] = nyc_hiv_by_age['TOTAL NUMBER OF HIV DIAGNOSES'].astype('float')
nyc_hiv_by_age['HIV DIAGNOSES PER 100,000 POPULATION'] = nyc_hiv_by_age['HIV DIAGNOSES PER 100,000 POPULATION'].replace('*', 0)
nyc_hiv_by_age['HIV DIAGNOSES PER 100,000 POPULATION'] = nyc_hiv_by_age['HIV DIAGNOSES PER 100,000 POPULATION'].astype('float')

In [None]:
all_races = nyc_hiv_by_age[nyc_hiv_by_age['RACE/ETHNICITY']=='All']
all_races['YEAR'].unique()

In [None]:
# data after 2013 also seems to be grouped by borough, which is not the case for the data before.
all_hoods = all_races[(all_races['NEIGHBORHOOD'] == 'All') & (all_races.AGE == 'All') & (all_races.Borough.isna() | (all_races.Borough == 'All')) ]
all_hoods.sort_values(by='YEAR').head(15)

In [None]:
nyc_hiv_by_sex_race = pd.read_csv('../data/HIV_AIDS_Diagnoses_by_Neighborhood__Sex__and_Race_Ethnicity_20240129.csv')
nyc_hiv_by_sex_race.head()

In [None]:
nyc_hiv_by_sex_race.YEAR.unique()

In [None]:
annual_report = pd.read_csv('../data/DOHMH_HIV_AIDS_Annual_Report_20240129.csv')
annual_report.head()

In [None]:
gen_annual_report = annual_report[(annual_report['Borough'] == 'All') & \
                                  (annual_report['Gender'] == 'All') & \
                                  (annual_report['Race'] == 'All') & \
                                  (annual_report['Age'] == 'All')]
gen_annual_report.info()

In [None]:
gen_annual_report.head(20)

In [None]:
gender_annual_report = annual_report[(annual_report['Borough'] == 'All') & \
                                  (annual_report['Year'] == 2021) & \
                                  (annual_report['Race'] == 'All') & \
                                  (annual_report['Gender'].isin(['Men', 'Women'])) & \
                                  (annual_report['Age'] == 'All')]
gender_annual_report.info()

In [None]:
gender_annual_report

In [None]:
plt.figure(FigureClass=Waffle,
           rows=10,
           columns=10,
           values=gender_annual_report['HIV diagnoses'],
           icons=['person', 'person-dress'],
           colors = ['#E37882', '#A82331'],
           font_size=23,
           #title={"label": "Waffle Title", "loc": "left"}
           )
#ax.set_title('Test Title')

In [None]:
age_annual_report = annual_report[
                                  (annual_report['Borough'] == 'All') & \
                                  (annual_report['Year'] == 2021) & \
                                  (annual_report['Race'] == 'All') & \
                                  (annual_report['Gender']=='All') & \
                                  (annual_report['Age'] != 'All')
                                  ]

age_annual_report.info()

In [None]:
age_annual_report.head(20)

In [None]:
colors = ['#A82331', '#D53444', '#DC5663','#E37882', '#EA99A1']
plt.figure(figsize=(8, 8))
plt.pie(age_annual_report['HIV diagnoses'], labels=age_annual_report['Age'], startangle=90, counterclock=False, autopct='%1.1f%%', textprops={'fontsize': 14}, colors=colors)


In [None]:
colors = ['#A82331', '#EA99A1']
pc_to_r = [80961, 1086806-80961]
labels = ['New York City', 'The Rest of the US']
plt.figure(figsize=(8, 8))
plt.pie(pc_to_r,
         labels=labels, 
         startangle=90, 
         counterclock=False, 
         autopct='%1.1f%%', 
         textprops={'fontsize': 14}, 
         colors=colors)

In [None]:
plt.figure(figsize=(12, 8))
ax = sns.barplot(data=age_annual_report, x='Age', y='HIV diagnoses', palette=['#A82331'], linewidth=0)
ax.set_xlabel(None)
ax.set_ylabel(None)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)

In [None]:
race_annual_report = annual_report[
                                  (annual_report['Borough'] == 'All') & \
                                  (annual_report['Year'] == 2021) & \
                                  (annual_report['Race'] != 'All') & \
                                  (annual_report['Gender']=='All') & \
                                  (annual_report['Age'] == 'All')
                                  ]

race_annual_report.sort_values(by='HIV diagnoses', inplace=True, ascending=False)
race_annual_report.head()

In [None]:
race_annual_report = race_annual_report[['HIV diagnoses', 'Race']]
race_annual_report.loc[len(race_annual_report.index)] = [82.0 + 17.0, 'Other']
race_annual_report = race_annual_report[~race_annual_report['Race'].isin(['Asian/Pacific Islander', 'Other/Unknown'])]
race_annual_report

In [None]:
colors = ['#A82331', '#D53444', '#DC5663','#EA99A1']
plt.figure(figsize=(8, 8))
ax = plt.pie(race_annual_report['HIV diagnoses'],
              labels=race_annual_report['Race'], 
              startangle=90, counterclock=False, 
              autopct='%1.1f%%', 
              textprops={'fontsize': 14}, 
              colors=colors)

In [None]:
gen_race_nyc = {
    "Black": 0.238,
    "Latinx/Hispanic": 0.289,
    "White": 0.319,
    "Other": 0.154
}
# gen_race_nyc_df = pd.DataFrame(gen_race_nyc)
# gen_race_nyc

In [None]:
colors = ['#A82331', '#D53444', '#DC5663','#EA99A1']
plt.figure(figsize=(8, 8))
ax = plt.pie(gen_race_nyc.values(),
              labels=gen_race_nyc.keys(), 
              startangle=90, counterclock=False, 
              autopct='%1.1f%%', 
              textprops={'fontsize': 14}, 
              colors=colors)

### How HIV rate and new diagnoses have changed overtime? General and by borough.

In [None]:
plt.figure(figsize=(14,6))
ax = sns.lineplot(data=gen_annual_report, y='HIV diagnoses', x='Year', color=main_color)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.ylim(0, max(gen_annual_report['HIV diagnoses']) + 500)

In [None]:
plt.figure(figsize=(14,6))
ax = sns.lineplot(data=gen_annual_report, y='HIV diagnosis rate', x='Year', color=main_color)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.ylim(0, max(gen_annual_report['HIV diagnosis rate']) + 10)

In [None]:
# diagnoses overtime by race
gen_annual_report_race = annual_report[
                                  (annual_report['Borough'] == 'All') & \
                                  (annual_report['Gender']=='All') & \
                                  (annual_report['Age'] == 'All') & \
                                  (annual_report['UHF'] == 'All')
                                  ]
gen_annual_report_race.head(40)


In [None]:
plt.figure(figsize=(14,6))
ax = sns.lineplot(data=gen_annual_report_race, y='HIV diagnosis rate', x='Year',hue='Race', color=main_color)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

In [None]:
gen_annual_report_borough = annual_report[(annual_report['Borough'] != 'All') & \
                                  (annual_report['Gender'] == 'All') & \
                                  (annual_report['Race'] == 'All') & \
                                  (annual_report['Age'] == 'All') & \
                                  (annual_report['UHF'] == 'All')]
gen_annual_report_borough.info()

In [None]:
gen_annual_report_borough[gen_annual_report_borough['Borough'] == 'Bronx']

In [None]:
plt.figure(figsize=(14,6))
ax = sns.lineplot(data=gen_annual_report_borough, y='HIV diagnoses', x='Year', hue='Borough')
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
#plt.ylim(0, max(gen_annual_report_borough['HIV diagnoses']))

In [None]:
#same plot but with the rate instead
plt.figure(figsize=(14,6))
ax = sns.lineplot(data=gen_annual_report_borough, y='HIV diagnosis rate', x='Year', hue='Borough')
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
#plt.ylim(0, max(gen_annual_report_borough['HIV diagnoses']))

### HIV Testing overtime

In [None]:
testing_nys = pd.read_csv('../data/BRFSS_nystate.csv')
testing_nys.columns

In [None]:
testing_nys = testing_nys[['Year', 'Question',
       'Response', 'Break_Out', 'Break_Out_Category', 'Sample_Size',
       'Data_value', ]]
testing_nys = testing_nys[(testing_nys.Break_Out == 'Overall') & \
                          (testing_nys.Break_Out_Category == 'Overall') & \
                          (testing_nys.Response == 'Yes')]
testing_nys.info()

In [None]:
testing_nys.head()

In [None]:
#Percentage of Population Tested Overtime
plt.figure(figsize=(12, 8))
hundr = [ 100 for i in range(10)]
ax2 = sns.barplot(data=testing_nys, x='Year', y=hundr,  palette=["#E8EBED"], linewidth=0)
ax = sns.barplot(data=testing_nys, x='Year', y='Data_value',  palette=[main_color], linewidth=0)

ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

In [None]:
#check to see if statistical area has the same trend
testing_sa = pd.read_csv('../data/BRFSS_stat_metroarea.csv')
testing_sa.columns
testing_sa = testing_sa[['Year', 'Question',
       'Response', 'Break_Out', 'Break_Out_Category', 'Sample_Size',
       'Data_value', ]]
testing_sa = testing_sa[(testing_sa.Break_Out == 'Overall') & \
                          (testing_sa.Break_Out_Category == 'Overall') & \
                          (testing_sa.Response == 'Yes')]
testing_sa.info()

In [None]:
ax = sns.barplot(data=testing_sa, x='Year', y='Data_value')

## Condom distribution

In [None]:
active_con_dis = pd.read_csv('../data/NYC_Condom_Availability_Program_-_HIV_condom_distribution_locations_20240129.csv')
active_con_dis.columns

In [None]:
active_con_dis = active_con_dis[['FacilityName','Address',
       'Borough', 'Zipcode', 'Latitude', 'Longitude']]

In [None]:
active_con_dis.info()

In [None]:
active_con_dis.drop_duplicates()
active_con_dis.to_csv('../data/clean_active_cond_dist.csv')

In [None]:
active_con_dis.head()

In [None]:
# check on locations by borough
acd_by_borough = active_con_dis[['FacilityName', 'Borough']].groupby('Borough').count().reset_index()
acd_by_borough.head()

In [None]:
plt.figure(figsize=(12, 8))
ax = sns.barplot(data=acd_by_borough, 
                 x='Borough', 
                 y='FacilityName', 
                 order=acd_by_borough.sort_values('FacilityName', ascending=False).Borough,
                 palette=[main_color],
                 linewidth=0)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
#plt.gca().spines['left'].set_visible(False)
for i in ax.containers:
    ax.bar_label(i,)

### Inactive condom distribution

In [None]:
inactive_con_dis = pd.read_csv('../data/NYC_Condom_Availability_Program_-_Inactive_HIV_condom_distribution_locations_20240129.csv')
inactive_con_dis.columns

In [None]:
inactive_con_dis = inactive_con_dis[['FacilityName',  'Borough',
       'Zipcode', 'Latitude', 'Longitude']]

In [None]:
inactive_con_dis.info()

In [None]:
inactive_con_dis.nunique()

In [None]:
inactive_con_dis = inactive_con_dis.drop_duplicates()
inactive_con_dis.to_csv('../data/clean_inactive_cond_dist.csv')

In [None]:
icd_by_borough = inactive_con_dis[['FacilityName', 'Borough']].groupby('Borough').count()
icd_by_borough.head()

In [None]:
icd_by_borough = inactive_con_dis[['FacilityName', 'Borough']].groupby('Borough').count().reset_index()
icd_by_borough.head()

In [None]:
plt.figure(figsize=(12, 8))
ax = sns.barplot(data=icd_by_borough, 
                 x='Borough', 
                 y='FacilityName', 
                 order=icd_by_borough.sort_values('FacilityName', ascending=False).Borough,
                 palette=[main_color],
                 linewidth=0)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
#plt.gca().spines['left'].set_visible(False)
for i in ax.containers:
    ax.bar_label(i,)

In [None]:
plt.figure(figsize=(12, 8))
ax2 = sns.barplot(data=icd_by_borough, 
                 x='Borough', 
                 y='FacilityName', 
                 order=icd_by_borough.sort_values('FacilityName', ascending=False).Borough,
                 palette=['#E8EBED'],
                 linewidth=0)
ax = sns.barplot(data=acd_by_borough, 
                 x='Borough', 
                 y='FacilityName', 
                 order=acd_by_borough.sort_values('FacilityName', ascending=False).Borough,
                 palette=[main_color],
                 linewidth=0)

ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_facecolor(background_color)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
#plt.gca().spines['left'].set_visible(False)
for i in ax.containers:
    ax.bar_label(i,)

### Syringe distribution

In [None]:
syringe_df = pd.read_csv('../data/syringe.csv')
syringe_df = syringe_df[syringe_df.Year >= 2011]
syringe_df

In [None]:
sns.lineplot(data=syringe_df, x='Year', y='tot_syr_dist')