# Notebook for running all the regressions using OLS for every group of cause--sex, as this is for age standadized data

## Based on the previous analysis, we use the age groups upto 74 years and logit SDI as the predictor rather than SDI, along with the HAQ indices obtained for GBD round 5

## Loading causes based on highest expenditure data (based on Joe's list and COD methods)

In [1]:
import pandas as pd
cause_list_final = pd.read_excel('/ihme/homes/arjuns13/notebooks/Documents/Data/CandidateCauses_Criteria_withCODEM_andDALYs_latest2ndJune.xlsx')

In [2]:
cause_list_final.head()

Unnamed: 0.1,Unnamed: 0,DEX ranking by total expenditure,GBD cause_id,DEX descriptive label,Preliminary amenable burden analysis conducted,Comment,Number of GBD round 5 global CODEM models in which HAQ index was included,Number of GBD round 5data rich CODEM models in which HAQ index was included,Was HAQ index included in nonfatal model?,Ranking DALYs in US,Priority,"Total DALYs in the United States, GBD round 5, year = 2016","DALYs per 100,000 people in the United States, GBD round 5, year = 2016","Total DALYs globally, GBD round 5, year = 2016","DALYs per 100000 people globally, GBD round 5, year = 2016",Expenditure per person?,Unnamed: 16,Unnamed: 17,Unnamed: 18
0,61,75,298.0,HIV/AIDS,,,,,,53,*,409951.8,126.970345,59047980.0,781.627137,,,,
1,0,1,587.0,Diabetes mellitus,Yes,,694.0,553.0,"Yes, proportion with foot ulcer",6,4,3814559.0,1181.445965,66515210.0,880.471923,,,Priority criteria,
2,1,2,493.0,Ischemic heart disease,,,372.5,608.5,Yes,1,4,7854095.0,2432.571827,168897700.0,2235.72515,,,1,Top 60 spending ranking
3,3,5,697.0,Falls,Yes,,352.5,30.0,"Yes, % treated",18,4,1899736.0,588.386632,35513920.0,470.10322,,,1,Top 60 DALY ranking
4,9,12,509.0,Chronic obstructive pulmonary disease,Yes,,532.5,353.0,Yes,4,4,4613470.0,1428.88471,80593220.0,1066.824645,,,1,CODEM model


In [3]:
cause_list_final.shape

(149, 19)

In [4]:
cause_ids_final = cause_list_final['GBD cause_id']

In [5]:
cause_ids_final = cause_ids_final.dropna()

In [6]:
cause_ids_final = cause_ids_final.astype('int')

### getting the values computed earlier - for HAQ frontiers, Logit_SDIs and older DALYs (without cause IDs)

In [7]:
haqFront_sdi_daly_data = pd.read_csv('/ihme/homes/arjuns13/notebooks/Documents/Data/haq_sdi_frontier_estimates_ageStd_aggregatedCauses_allMostDetLocs.csv')

In [8]:
haqFront_sdi_daly_data[haqFront_sdi_daly_data['location_id']==7]

Unnamed: 0.1,Unnamed: 0,V1,location_id,logit_sdi,age_group_id,ln_haq,fit2
0,1,0,7,0.110348,27,3.792486,3.757823


In [9]:
haqFront_sdi_daly_data.head()

Unnamed: 0.1,Unnamed: 0,V1,location_id,logit_sdi,age_group_id,ln_haq,fit2
0,1,0,7,0.110348,27,3.792486,3.757823
1,2,1,8,1.820419,27,4.367523,4.668259
2,3,2,10,-0.093432,27,3.509703,3.649331
3,4,3,12,0.046388,27,3.245486,3.723771
4,5,4,13,1.13717,27,4.000794,4.304499


In [10]:
final = haqFront_sdi_daly_data

In [11]:
final['haq'] = 10**(final['ln_haq'])

In [12]:
final['exp_fit'] = 10**(final['fit2'])

In [13]:
final.loc[final['exp_fit'] > 100.0, 'exp_fit']=100


In [14]:
final['exp_fit'].describe()

count    823.0
mean     100.0
std        0.0
min      100.0
25%      100.0
50%      100.0
75%      100.0
max      100.0
Name: exp_fit, dtype: float64

In [15]:
final['haq_over_frontier'] = final['haq']/final['exp_fit']

In [16]:
final[final['haq_over_frontier']==1].age_group_id.unique() ## make sure that HAQ over frontier = 1 for age group IDs > 19

array([], dtype=int64)

## End of the copied part from the previously done analysis

In [17]:
final.head()

Unnamed: 0.1,Unnamed: 0,V1,location_id,logit_sdi,age_group_id,ln_haq,fit2,haq,exp_fit,haq_over_frontier
0,1,0,7,0.110348,27,3.792486,3.757823,6201.348166,100.0,62.013482
1,2,1,8,1.820419,27,4.367523,4.668259,23308.955739,100.0,233.089557
2,3,2,10,-0.093432,27,3.509703,3.649331,3233.720992,100.0,32.33721
3,4,3,12,0.046388,27,3.245486,3.723771,1759.89108,100.0,17.598911
4,5,4,13,1.13717,27,4.000794,4.304499,10018.298652,100.0,100.182987


## To be merged (on Location Id and Age Group ID) with the DALY data coming from the central funcs - here I end up keeping LogitSDI, rather than SDI, which is what I used for the 2nd June run

In [18]:
haq_sdi = final[['location_id', 'age_group_id', 'logit_sdi','exp_fit','haq','haq_over_frontier']]

In [19]:
haq_sdi.head()

Unnamed: 0,location_id,age_group_id,logit_sdi,exp_fit,haq,haq_over_frontier
0,7,27,0.110348,100.0,6201.348166,62.013482
1,8,27,1.820419,100.0,23308.955739,233.089557
2,10,27,-0.093432,100.0,3233.720992,32.33721
3,12,27,0.046388,100.0,1759.89108,17.598911
4,13,27,1.13717,100.0,10018.298652,100.182987


In [20]:
haq_sdi.shape

(823, 6)

## Getting all the 823 locations and the 146 causes from above data to enter as input into the get_outputs central function

In [21]:
location_list = list(haq_sdi.location_id.unique())

In [22]:
len(location_list)

823

In [23]:
cause_list = list(cause_ids_final.unique())

In [24]:
len(cause_list)

146

In [25]:
from db_queries import get_outputs as go 
dalys_per_cause_rate = go("cause", cause_id=cause_list, metric_id=[3],\
                     measure_id=2, gbd_round_id=5,year_id=[2016],sex_id=[1,2],location_id=location_list,\
                    age_group_id = [27])
dalys_per_cause_rate.head()

Unnamed: 0,age_group_id,cause_id,location_id,measure_id,metric_id,sex_id,year_id,acause,age_group_name,cause_name,expected,location_name,measure_name,metric_name,sex,val,upper,lower
0,27,297,7,2,3,1,2016,tb,Age-standardized,Tuberculosis,False,North Korea,DALYs (Disability-Adjusted Life Years),Rate,Male,0.004841,0.006399,0.00364
1,27,298,7,2,3,1,2016,hiv,Age-standardized,HIV/AIDS,False,North Korea,DALYs (Disability-Adjusted Life Years),Rate,Male,0.002088,0.011767,9.5e-05
2,27,302,7,2,3,1,2016,diarrhea,Age-standardized,Diarrheal diseases,False,North Korea,DALYs (Disability-Adjusted Life Years),Rate,Male,0.003865,0.005466,0.002768
3,27,321,7,2,3,1,2016,intest_other,Age-standardized,Other intestinal infectious diseases,False,North Korea,DALYs (Disability-Adjusted Life Years),Rate,Male,1e-05,2.2e-05,3e-06
4,27,322,7,2,3,1,2016,lri,Age-standardized,Lower respiratory infections,False,North Korea,DALYs (Disability-Adjusted Life Years),Rate,Male,0.014397,0.018903,0.010861


In [26]:
dalys_per_cause_rate[(dalys_per_cause_rate.location_id == 13) & (dalys_per_cause_rate.age_group_id == 27)\
                    & (dalys_per_cause_rate.cause_id == 493)]

Unnamed: 0,age_group_id,cause_id,location_id,measure_id,metric_id,sex_id,year_id,acause,age_group_name,cause_name,expected,location_name,measure_name,metric_name,sex,val,upper,lower
1235,27,493,13,2,3,1,2016,cvd_ihd,Age-standardized,Ischemic heart disease,False,Malaysia,DALYs (Disability-Adjusted Life Years),Rate,Male,0.038142,0.042143,0.035173
1381,27,493,13,2,3,2,2016,cvd_ihd,Age-standardized,Ischemic heart disease,False,Malaysia,DALYs (Disability-Adjusted Life Years),Rate,Female,0.01931,0.021432,0.017688


### Dropping NaNs, renaming some variables

In [27]:
dalys_per_cause_rate = dalys_per_cause_rate.dropna()

In [28]:
dalys_per_cause_rate.shape

(230440, 18)

In [29]:
dalys_per_cause_rate = dalys_per_cause_rate[['age_group_id','cause_id','location_id','sex_id','sex','age_group_name',\
                                   'cause_name','location_name','val']]

In [30]:
dalys_per_cause_rate = dalys_per_cause_rate.rename(index=str, columns={"val": "DALYs_per_capita"})

In [31]:
dalys_per_cause_rate.head()

Unnamed: 0,age_group_id,cause_id,location_id,sex_id,sex,age_group_name,cause_name,location_name,DALYs_per_capita
0,27,297,7,1,Male,Age-standardized,Tuberculosis,North Korea,0.004841
1,27,298,7,1,Male,Age-standardized,HIV/AIDS,North Korea,0.002088
2,27,302,7,1,Male,Age-standardized,Diarrheal diseases,North Korea,0.003865
3,27,321,7,1,Male,Age-standardized,Other intestinal infectious diseases,North Korea,1e-05
4,27,322,7,1,Male,Age-standardized,Lower respiratory infections,North Korea,0.014397


In [32]:
dalys_per_cause_rate.shape

(230440, 9)

In [33]:
dalys_per_cause = dalys_per_cause_rate

In [34]:
dalys_per_cause.shape

(230440, 9)

## Don't need entries where the DALYs are zero

In [35]:
dalys_per_cause = dalys_per_cause[dalys_per_cause['DALYs_per_capita'] > 0]

## Merging the DALYs data with the HAQ-SDI data

In [36]:
dalys_haq_sdi_withCauses = pd.merge(dalys_per_cause, haq_sdi, on=['location_id', 'age_group_id'])

In [37]:
dalys_haq_sdi_withCauses.shape

(225209, 13)

In [38]:
dalys_haq_sdi_withCauses.head()

Unnamed: 0,age_group_id,cause_id,location_id,sex_id,sex,age_group_name,cause_name,location_name,DALYs_per_capita,logit_sdi,exp_fit,haq,haq_over_frontier
0,27,297,7,1,Male,Age-standardized,Tuberculosis,North Korea,0.004841,0.110348,100.0,6201.348166,62.013482
1,27,298,7,1,Male,Age-standardized,HIV/AIDS,North Korea,0.002088,0.110348,100.0,6201.348166,62.013482
2,27,302,7,1,Male,Age-standardized,Diarrheal diseases,North Korea,0.003865,0.110348,100.0,6201.348166,62.013482
3,27,321,7,1,Male,Age-standardized,Other intestinal infectious diseases,North Korea,1e-05,0.110348,100.0,6201.348166,62.013482
4,27,322,7,1,Male,Age-standardized,Lower respiratory infections,North Korea,0.014397,0.110348,100.0,6201.348166,62.013482


In [39]:
dalys_haq_sdi_withCauses_distinct = dalys_haq_sdi_withCauses[['age_group_id','age_group_name','cause_id','cause_name', \
                                                             'sex_id', 'sex']].drop_duplicates()

### Removing the nominal variables before running the regressions

In [40]:
dalys_haq_sdi_withCauses_forAnalysis = dalys_haq_sdi_withCauses[['age_group_id','cause_id','location_id','sex_id',\
                                                                 'DALYs_per_capita','logit_sdi','exp_fit','haq',\
                                                                 'haq_over_frontier']]

In [41]:
dalys_haq_sdi_withCauses_forAnalysis.head()

Unnamed: 0,age_group_id,cause_id,location_id,sex_id,DALYs_per_capita,logit_sdi,exp_fit,haq,haq_over_frontier
0,27,297,7,1,0.004841,0.110348,100.0,6201.348166,62.013482
1,27,298,7,1,0.002088,0.110348,100.0,6201.348166,62.013482
2,27,302,7,1,0.003865,0.110348,100.0,6201.348166,62.013482
3,27,321,7,1,1e-05,0.110348,100.0,6201.348166,62.013482
4,27,322,7,1,0.014397,0.110348,100.0,6201.348166,62.013482


## Capping exp_fit2 to 100 for the corresponding values of exp_fit

In [42]:
dalys_haq_sdi_withCauses_forAnalysis.loc[dalys_haq_sdi_withCauses_forAnalysis['exp_fit'] > 100.0, 'exp_fit']=100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


### Creating the groups (Cause-Id, Age-Group, Sex-Id)

In [43]:
dalys_haq_sdi_grouped =  \
dict(tuple(dalys_haq_sdi_withCauses_forAnalysis.groupby(['cause_id','age_group_id', 'sex_id'])))

### This is just how the data looks for the cause 530, age group 2, and sex 1, just as an example

In [44]:
dalys_haq_sdi_grouped[297,27,1].head()

Unnamed: 0,age_group_id,cause_id,location_id,sex_id,DALYs_per_capita,logit_sdi,exp_fit,haq,haq_over_frontier
0,27,297,7,1,0.004841,0.110348,100.0,6201.348166,62.013482
272,27,297,8,1,0.000791,1.820419,100.0,23308.955739,233.089557
546,27,297,10,1,0.012335,-0.093432,100.0,3233.720992,32.33721
820,27,297,12,1,0.015318,0.046388,100.0,1759.89108,17.598911
1094,27,297,13,1,0.002087,1.13717,100.0,10018.298652,100.182987


# Set HAQ frontier to 0 for predictions dataframe

In [45]:
# Create df for predictions where haq_frontier is set to 0
df_predictions = dalys_haq_sdi_withCauses_forAnalysis.copy()
df_predictions['haq_over_frontier'] = 0
df_predictions.head()

Unnamed: 0,age_group_id,cause_id,location_id,sex_id,DALYs_per_capita,logit_sdi,exp_fit,haq,haq_over_frontier
0,27,297,7,1,0.004841,0.110348,100.0,6201.348166,0
1,27,298,7,1,0.002088,0.110348,100.0,6201.348166,0
2,27,302,7,1,0.003865,0.110348,100.0,6201.348166,0
3,27,321,7,1,1e-05,0.110348,100.0,6201.348166,0
4,27,322,7,1,0.014397,0.110348,100.0,6201.348166,0


In [46]:
grouped_for_predictions =  dict(tuple(df_predictions.groupby(['cause_id','age_group_id', 'sex_id'])))

## Next we create a DataFrame where all the final regression results would be stored

In [47]:
dalys_haq_sdi_answers = pd.DataFrame(columns=['cause_id','age_group_id', 'sex_id',\
                                              'intercept','intercept_pValue','haq_over_frontier_coef',\
                                              'haq_over_frontier_coef_pValue','logit_sdi_coef',\
                                              'logit_sdi_coef_pValue','r2',])

In [48]:
import statsmodels.formula.api as smf
i=0
for g in dalys_haq_sdi_grouped:
    df=dalys_haq_sdi_grouped[g]
    linear_reg = smf.ols("DALYs_per_capita ~ haq_over_frontier + logit_sdi", \
                      data = df)
    linear_reg_fit = linear_reg.fit()

    
    dalys_haq_sdi_answers.loc[i] = [g[0],g[1],g[2],linear_reg_fit.params[0],linear_reg_fit.pvalues.Intercept,\
                                    linear_reg_fit.params[1],linear_reg_fit.pvalues.haq_over_frontier,\
                                    linear_reg_fit.params[2],linear_reg_fit.pvalues.logit_sdi,\
                                    linear_reg_fit.rsquared]
    i+=1
   # print("Current iteration:" + str(i))

## The next step is taken so that we can look at data using cause names rather than cause IDs and age group names rather than age group IDs

In [49]:
dalys_haq_sdi_answers = dalys_haq_sdi_answers.merge(dalys_haq_sdi_withCauses_distinct, how='left')

In [50]:
dalys_haq_sdi_answers.head()

Unnamed: 0,cause_id,age_group_id,sex_id,intercept,intercept_pValue,haq_over_frontier_coef,haq_over_frontier_coef_pValue,logit_sdi_coef,logit_sdi_coef_pValue,r2,age_group_name,cause_name,sex
0,297.0,27.0,1.0,0.018018,7.450997e-146,-1.1e-05,0.105526,-0.009537,1.007356e-23,0.461474,Age-standardized,Tuberculosis,Male
1,297.0,27.0,2.0,0.010048,9.459788e-139,2e-06,0.617212,-0.006653,6.377185e-33,0.470518,Age-standardized,Tuberculosis,Female
2,298.0,27.0,1.0,0.027086,2.281757e-51,-6.4e-05,0.001716,-0.006637,0.01513699,0.153045,Age-standardized,HIV/AIDS,Male
3,298.0,27.0,2.0,0.027011,6.4392e-50,-4.6e-05,0.025247,-0.00989,0.0003689814,0.16446,Age-standardized,HIV/AIDS,Female
4,302.0,27.0,1.0,0.018031,6.496932e-174,1.7e-05,0.004872,-0.01361,3.4224340000000005e-55,0.560209,Age-standardized,Diarrheal diseases,Male


## Filtering to keep only columns we're interested in, dropping NaNs, and then keeping only those rows where the coefficient for HAQ_over_Frontier as well SDI is statistically significant according to our model above

In [51]:
dalys_haq_sdi_answers_out = dalys_haq_sdi_answers[['cause_name', 'age_group_name', 'sex', \
                                                   'intercept', 'intercept_pValue', \
                                                   'haq_over_frontier_coef', 'haq_over_frontier_coef_pValue',\
                                                   'logit_sdi_coef', 'logit_sdi_coef_pValue', 'r2']]

In [52]:
dalys_haq_sdi_answers_out.head()

Unnamed: 0,cause_name,age_group_name,sex,intercept,intercept_pValue,haq_over_frontier_coef,haq_over_frontier_coef_pValue,logit_sdi_coef,logit_sdi_coef_pValue,r2
0,Tuberculosis,Age-standardized,Male,0.018018,7.450997e-146,-1.1e-05,0.105526,-0.009537,1.007356e-23,0.461474
1,Tuberculosis,Age-standardized,Female,0.010048,9.459788e-139,2e-06,0.617212,-0.006653,6.377185e-33,0.470518
2,HIV/AIDS,Age-standardized,Male,0.027086,2.281757e-51,-6.4e-05,0.001716,-0.006637,0.01513699,0.153045
3,HIV/AIDS,Age-standardized,Female,0.027011,6.4392e-50,-4.6e-05,0.025247,-0.00989,0.0003689814,0.16446
4,Diarrheal diseases,Age-standardized,Male,0.018031,6.496932e-174,1.7e-05,0.004872,-0.01361,3.4224340000000005e-55,0.560209


In [53]:
dalys_haq_sdi_answers_out.shape

(274, 10)

In [54]:
dalys_haq_sdi_answers_out = dalys_haq_sdi_answers_out.dropna()

In [55]:
dalys_haq_sdi_answers_out.shape

(274, 10)

In [56]:
dalys_haq_sdi_answers_out_significant = dalys_haq_sdi_answers[dalys_haq_sdi_answers['haq_over_frontier_coef_pValue'] < 0.05]
dalys_haq_sdi_answers_out_significant = dalys_haq_sdi_answers_out_significant[dalys_haq_sdi_answers_out_significant['logit_sdi_coef_pValue'] < 0.05]
             

In [57]:
dalys_haq_sdi_answers_out_significant.shape

(167, 13)

In [58]:
dalys_haq_sdi_answers_out_significant['cause_id'] = dalys_haq_sdi_answers_out_significant['cause_id'].astype('int64')

In [59]:
dalys_haq_sdi_answers_out_significant['age_group_id'] = dalys_haq_sdi_answers_out_significant['age_group_id'].astype('int64')

In [60]:
dalys_haq_sdi_answers_out_significant['sex_id'] = dalys_haq_sdi_answers_out_significant['sex_id'].astype('int64')

In [61]:
dalys_haq_sdi_answers_out_significant.head()

Unnamed: 0,cause_id,age_group_id,sex_id,intercept,intercept_pValue,haq_over_frontier_coef,haq_over_frontier_coef_pValue,logit_sdi_coef,logit_sdi_coef_pValue,r2,age_group_name,cause_name,sex
2,298,27,1,0.027086,2.281757e-51,-6.358639e-05,0.001715718,-0.006637,0.01513699,0.153045,Age-standardized,HIV/AIDS,Male
3,298,27,2,0.027011,6.4392e-50,-4.595813e-05,0.02524703,-0.00989,0.0003689814,0.16446,Age-standardized,HIV/AIDS,Female
4,302,27,1,0.018031,6.496932e-174,1.685661e-05,0.004871939,-0.01361,3.4224340000000005e-55,0.560209,Age-standardized,Diarrheal diseases,Male
6,321,27,1,1.6e-05,5.048977e-146,4.101624e-08,3.102767e-11,-1.6e-05,1.694059e-67,0.523738,Age-standardized,Other intestinal infectious diseases,Male
7,321,27,2,2e-05,4.841675e-133,6.351323e-08,7.178372e-15,-2.2e-05,9.135697e-73,0.522734,Age-standardized,Other intestinal infectious diseases,Female


## Segregating for men and women

In [62]:
dalys_haq_sdi_answers_out_significant_male = \
dalys_haq_sdi_answers_out_significant[dalys_haq_sdi_answers_out_significant['sex_id']==1]

In [63]:
dalys_haq_sdi_answers_out_significant_male.head()

Unnamed: 0,cause_id,age_group_id,sex_id,intercept,intercept_pValue,haq_over_frontier_coef,haq_over_frontier_coef_pValue,logit_sdi_coef,logit_sdi_coef_pValue,r2,age_group_name,cause_name,sex
2,298,27,1,0.027086,2.281757e-51,-6.358639e-05,0.001715718,-0.006637,0.01513699,0.153045,Age-standardized,HIV/AIDS,Male
4,302,27,1,0.018031,6.496932e-174,1.685661e-05,0.004871939,-0.01361,3.4224340000000005e-55,0.560209,Age-standardized,Diarrheal diseases,Male
6,321,27,1,1.6e-05,5.048977e-146,4.101624e-08,3.102767e-11,-1.6e-05,1.694059e-67,0.523738,Age-standardized,Other intestinal infectious diseases,Male
8,322,27,1,0.0239,3.441097e-247,-1.966858e-05,0.0008312156,-0.009301,1.285314e-29,0.573996,Age-standardized,Lower respiratory infections,Male
10,328,27,1,0.000805,0.0,1.345372e-06,1.754739e-35,-7.4e-05,1.445375e-07,0.313053,Age-standardized,Upper respiratory infections,Male


In [64]:
dalys_haq_sdi_answers_out_significant_female = \
dalys_haq_sdi_answers_out_significant[dalys_haq_sdi_answers_out_significant['sex_id']==2]

In [65]:
dalys_haq_sdi_answers_out_significant_female.shape

(80, 13)

In [66]:
dalys_haq_sdi_answers_out_significant_female.head()

Unnamed: 0,cause_id,age_group_id,sex_id,intercept,intercept_pValue,haq_over_frontier_coef,haq_over_frontier_coef_pValue,logit_sdi_coef,logit_sdi_coef_pValue,r2,age_group_name,cause_name,sex
3,298,27,2,0.027011,6.4392e-50,-4.595813e-05,0.02524703,-0.00989,0.0003689814,0.16446,Age-standardized,HIV/AIDS,Female
7,321,27,2,2e-05,4.841675e-133,6.351323e-08,7.178372e-15,-2.2e-05,9.135697e-73,0.522734,Age-standardized,Other intestinal infectious diseases,Female
11,328,27,2,0.000791,0.0,1.701727e-06,1.494427e-45,-9.3e-05,1.446444e-09,0.379062,Age-standardized,Upper respiratory infections,Female
15,332,27,2,0.003596,9.994309e-154,7.002889e-06,1.156607e-07,-0.003241,2.818979e-63,0.53725,Age-standardized,Meningitis,Female
19,338,27,2,2.8e-05,5.273919e-16,1.379698e-07,0.0007496566,-3.9e-05,2.236708e-12,0.104621,Age-standardized,Diphtheria,Female


In [67]:
dalys_haq_sdi_answers_out_significant_male.shape

(87, 13)

In [68]:
final_table_male = dalys_haq_sdi_answers_out_significant_male[['cause_id', 'cause_name',                                                                           
                                                                            'haq_over_frontier_coef']]                                              

In [69]:
final_table_male.head()

Unnamed: 0,cause_id,cause_name,haq_over_frontier_coef
2,298,HIV/AIDS,-6.358639e-05
4,302,Diarrheal diseases,1.685661e-05
6,321,Other intestinal infectious diseases,4.101624e-08
8,322,Lower respiratory infections,-1.966858e-05
10,328,Upper respiratory infections,1.345372e-06


In [70]:
final_table_male.shape

(87, 3)

In [72]:
final_table_male.shape

(87, 3)

In [73]:
type(final_table_male)

pandas.core.frame.DataFrame

In [74]:
final_table_female = dalys_haq_sdi_answers_out_significant_female[['cause_id', 'cause_name',\
                                                                               'haq_over_frontier_coef']]                                              

In [75]:
final_table_female.head()

Unnamed: 0,cause_id,cause_name,haq_over_frontier_coef
3,298,HIV/AIDS,-4.595813e-05
7,321,Other intestinal infectious diseases,6.351323e-08
11,328,Upper respiratory infections,1.701727e-06
15,332,Meningitis,7.002889e-06
19,338,Diphtheria,1.379698e-07


In [76]:
final_table_female.shape

(80, 3)

In [77]:
type(final_table_female)

pandas.core.frame.DataFrame

## Changed the below to Markdown type rather than code to make sure, nothing gets over-written by mistake

final_table_female.to_csv('/ihme/homes/arjuns13/notebooks/Documents/Data/dalys_haq_sdi_females_final_table_ageStd_7Jun19.csv')

final_table_female.to_csv('J:\\Project\\Cost_Effectiveness\\NPC\\Regression_Analysis\\dalys_haq_sdi_females_final_table_ageStd_7Jun19.csv')

final_table_male.to_csv('/ihme/homes/arjuns13/notebooks/Documents/Data/dalys_haq_sdi_males_final_table_ageStd_7Jun19.csv')

final_table_male.to_csv('J:\\Project\\Cost_Effectiveness\\NPC\\Regression_Analysis\\dalys_haq_sdi_males_final_table_ageStd_7Jun19.csv')