# Worldwide Autism Burden

In [1]:
import pandas as pd



### Importing data

In [2]:
data_1 = pd.read_csv('raw_data/data_1.csv', usecols=['measure_name', 'location_name', 'sex_name', 'age_name', 'year', 'val'])
data_2 = pd.read_csv('raw_data/data_2.csv', usecols=['measure_name', 'location_name', 'sex_name', 'age_name', 'year', 'val'])
data_3 = pd.read_csv('raw_data/data_3.csv', usecols=['measure_name', 'location_name', 'sex_name', 'age_name', 'year', 'val'])

data = pd.concat([data_1, data_2, data_3])

population = pd.read_csv('raw_data/population.csv', names=['country', 'year', 'sex', 'age', 'population', 'random'], usecols=['country', 'year', 'sex', 'age', 'population'], skiprows=1)

### Cleaning data

In [3]:
data.loc[data['measure_name'] == 'DALYs (Disability-Adjusted Life Years)', 'measure_name'] = data.loc[data['measure_name'] == 'DALYs (Disability-Adjusted Life Years)', 'measure_name'].str.slice(stop=5)
data['age_name'] = data['age_name'].str.replace(' years', '')
data['val'] = data['val'].round(2)

data = data[~data.duplicated(keep='first')].reset_index(drop=True)

In [4]:
population = population[population['year'].str.isdigit()]
population['year'] = population['year'].astype(int)
population = population[population['year'].isin(list(range(1990,2020)))]

population[population['sex'] == 'Both Sexes'] = population[population['age'] == 'Total']

population = population[~(population['age'] == 'Unknown')]
population['age'] = population['age'].str.replace(' ', '')
population = population[population['age'].isin(['0-4', '5-9', '10-14', 
                                                '15-19', '20-24', '25-29', 
                                                '30-34', '35-39', '40-44', 
                                                '45-49', '50-54', '55-59', 
                                                '60-64', '65-69', '70-74', 
                                                '75-79', '80+'])]
step_1 = [x if x not in ['0-4', '5-9'] else '0-9' for x in population['age']]
step_2 = [x if x not in ['10-14', '15-19'] else '10-19' for x in step_1]
step_3 = [x if x not in ['60-64', '65-69', '70-74', '75-79'] else '60-79' for x in step_2]
population['age'] = step_3
population = population.groupby(['country', 'year', 'sex', 'age'])[['population']].sum()

population = population.reset_index().dropna()
population['year'] = population['year'].astype(int)


In [5]:
data = data.groupby(['age_name', 'year', 'location_name', 'sex_name', 'measure_name'])[['val']].sum().reset_index()

data_dalys = data[data['measure_name'] == 'DALYs'].pivot(index=['location_name', 'sex_name', 'age_name', 'year'], columns='measure_name', values='val').reset_index().rename_axis('index', axis=1)
data_incidence = data[data['measure_name'] == 'Incidence'].pivot(index=['location_name', 'sex_name', 'age_name', 'year'], columns='measure_name', values='val').reset_index().rename_axis('index', axis=1)
data_prevalence = data[data['measure_name'] == 'Prevalence'].pivot(index=['location_name', 'sex_name', 'age_name', 'year'], columns='measure_name', values='val').reset_index().rename_axis('index', axis=1)