# Worldwide Autism Burden

In [None]:
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

### Importing data

In [None]:
prevalence_abs = pd.read_csv('prevalence_abs.csv').drop(['measure', 'metric', 'cause', 'upper', 'lower'], axis=1).rename({'val': 'prevalence_abs'}, axis=1)
prevalence_pct = pd.read_csv('prevalence_pct.csv').drop(['measure', 'metric', 'cause', 'upper', 'lower'], axis=1).rename({'val': 'prevalence_pct'}, axis=1)
prevalence_rate = pd.read_csv('prevalence_rate.csv').drop(['measure', 'metric', 'cause', 'upper', 'lower'], axis=1).rename({'val': 'prevalence_rate'}, axis=1)

In [None]:
prevalence_abs = prevalence_abs.reset_index().pivot(index=['location', 'age', 'year'], columns=['sex'], values='prevalence_abs').reset_index().rename({'Female': 'female', 'Male': 'male'}, axis=1)
prevalence_pct = prevalence_pct.reset_index().pivot(index=['location', 'age', 'year'], columns=['sex'], values='prevalence_pct').reset_index().rename({'Female': 'female', 'Male': 'male'}, axis=1)
prevalence_rate = prevalence_rate.reset_index().pivot(index=['location', 'age', 'year'], columns=['sex'], values='prevalence_rate').reset_index().rename({'Female': 'female', 'Male': 'male'}, axis=1)



### Cleaning data

In [None]:
prevalence_abs['age'] = prevalence_abs['age'].str.replace(' years', '')
prevalence_pct['age'] = prevalence_pct['age'].str.replace(' years', '')
prevalence_rate['age'] = prevalence_rate['age'].str.replace(' years', '')

### Exploratory Analysis

In [None]:
print(prevalence_abs.info())
print('===================')
print(prevalence_pct.info())
print('===================')
print(prevalence_pct.info())

In [None]:
print(prevalence_abs.describe())
print('===================')
print(prevalence_pct.describe())
print('===================')
print(prevalence_rate.describe())

In [None]:
sns.boxplot(data=prevalence_abs, y='female', sym='')
plt.title('Distribution of absolute prevalence of ASD for females')
plt.xlabel('female')
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
sns.boxplot(data=prevalence_abs, y='male', sym='')
plt.title('Distribution of absolute prevalence of ASD for males')
plt.xlabel('male')
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
sns.boxplot(data=prevalence_abs, y='female', x='age', sym='')
plt.title('Distribution of absolute prevalence of ASD per age category for females')
plt.xlabel('female')
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
sns.boxplot(data=prevalence_abs, y='male', x='age', sym='')
plt.title('Distribution of absolute prevalence of ASD per age category for males')
plt.xlabel('male')
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
sns.boxplot(data=prevalence_abs, y='female', x='year', sym='')
plt.title('Distribution of absolute prevalence of ASD per year for females')
plt.xlabel('year')
plt.xticks(rotation=45)
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
sns.boxplot(data=prevalence_abs, y='male', x='year', sym='')
plt.title('Distribution of absolute prevalence of ASD per year for males')
plt.xlabel('year')
plt.xticks(rotation=45)
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
total_abs = prevalence_abs[prevalence_abs['year'] == 2019].groupby(['location'])[['female', 'male']].sum()

In [None]:
total_abs_females = total_abs.sort_values('female', ascending=False).head(10)
total_abs_males = total_abs.sort_values('male', ascending=False).head(10)

In [None]:
sns.barplot(data=total_abs_females, x='location', y='female')
plt.title('Countries with highest absolute prevalence of ASD in females in 2019')
plt.xlabel('country')
plt.xticks(rotation=90)
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
sns.barplot(data=total_abs_males, x='location', y='male')
plt.title('Countries with highest absolute prevalence of ASD in males in 2019')
plt.xlabel('country')
plt.xticks(rotation=90)
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
total_abs_age = prevalence_abs[prevalence_abs['year'] == 2019].groupby(['age'])[['female', 'male']].sum().reset_index()

In [None]:
sns.barplot(data=total_abs_age, x='age', y='female')
plt.title('Absolute prevalence of ASD per age category in females in 2019')
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
sns.barplot(data=total_abs_age, x='age', y='male')
plt.title('Absolute prevalence of ASD per age category in males in 2019')
plt.ylabel('absolute prevalence')
plt.show()

In [None]:
age_year_male = prevalence_abs.groupby(['age', 'year'])[['male']].sum()
age_year_female = prevalence_abs.groupby(['age', 'year'])[['female']].sum()

In [None]:
ax = sns.lineplot(data=age_year_female, x='year', y='female', hue='age')
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.title('Absolute prevalence of ASD for each year and age category in females')
plt.ylabel('Absolute prevalence')
plt.show()

In [None]:
ax = sns.lineplot(data=age_year_male, x='year', y='male', hue='age')
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.title('Absolute prevalence of ASD for each year and age category in males')
plt.ylabel('Absolute prevalence')
plt.show()

In [None]:
total_abs['sex_ratio'] = total_abs['male'] / total_abs['female']

total_abs_lowest = total_abs.sort_values('sex_ratio').head(10)


In [None]:
sns.barplot(data=total_abs_lowest, x='location', y='sex_ratio')
plt.title('Countries with lowest sex ration (male:female) of ASD in 2019')
plt.xlabel('country')
plt.xticks(rotation=90)
plt.ylabel('sex ratio')
plt.show()

In [None]:
total_abs_highest = total_abs.sort_values('sex_ratio', ascending=False).head(10)

In [None]:
sns.barplot(data=total_abs_highest, x='location', y='sex_ratio')
plt.title('Countries with highest sex ration (male:female) of ASD in 2019')
plt.xlabel('country')
plt.xticks(rotation=90)
plt.ylabel('sex ratio')
plt.show()