# Import Dependencies

In [1]:
import pandas as pd
from sqlalchemy import create_engine
import psycopg2
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
import sqlalchemy
from sqlalchemy import Column, Integer, String, Float, Date
from sqlalchemy import Table
from sqlalchemy.ext.declarative import declarative_base
from secret import password

In [2]:
adhd_df = pd.read_csv('raw-data/adhd_raw.csv')
asperger_df = pd.read_csv('raw-data/asperger_raw.csv')
autism_df = pd.read_csv('raw-data/autism_raw.csv')
idd_df = pd.read_csv('raw-data/idd_raw.csv')

# Cleaning ADHD Data

In [3]:
adhd_df.head()

Unnamed: 0,Entity,Code,Year,Prevalence - Attention-deficit/hyperactivity disorder - Sex: Male - Age: Age-standardized (Percent),Prevalence - Attention-deficit/hyperactivity disorder - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates),Continent
0,Abkhazia,OWID_ABK,2015,,,,Asia
1,Afghanistan,AFG,1990,1.559695,0.683613,12412311.0,
2,Afghanistan,AFG,1991,1.560023,0.683845,13299016.0,
3,Afghanistan,AFG,1992,1.560413,0.684081,14485543.0,
4,Afghanistan,AFG,1993,1.560768,0.68433,15816601.0,


In [4]:
# we're only focusing on years 1990+, no other values so we're filtering the rows with numbers other than 1990+
adhd_df_filtered = adhd_df[adhd_df['Year'] >= 1990]

# dropping continent column as this is where a lot of our NaN values are coming from
adhd_df_filtered = adhd_df_filtered.drop(columns = 'Continent')
adhd_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalence - Attention-deficit/hyperactivity disorder - Sex: Male - Age: Age-standardized (Percent),Prevalence - Attention-deficit/hyperactivity disorder - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
0,Abkhazia,OWID_ABK,2015,,,
1,Afghanistan,AFG,1990,1.559695,0.683613,12412311.0
2,Afghanistan,AFG,1991,1.560023,0.683845,13299016.0
3,Afghanistan,AFG,1992,1.560413,0.684081,14485543.0
4,Afghanistan,AFG,1993,1.560768,0.684330,15816601.0
...,...,...,...,...,...,...
56624,Zimbabwe,ZWE,2018,,,14438812.0
56625,Zimbabwe,ZWE,2019,,,14645473.0
56626,Zimbabwe,ZWE,2020,,,14862927.0
56627,Zimbabwe,ZWE,2021,,,15092171.0


In [5]:
# dropping all NA values
adhd_df_filtered = adhd_df_filtered.dropna()

In [6]:
adhd_df_filtered.head()

Unnamed: 0,Entity,Code,Year,Prevalence - Attention-deficit/hyperactivity disorder - Sex: Male - Age: Age-standardized (Percent),Prevalence - Attention-deficit/hyperactivity disorder - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
1,Afghanistan,AFG,1990,1.559695,0.683613,12412311.0
2,Afghanistan,AFG,1991,1.560023,0.683845,13299016.0
3,Afghanistan,AFG,1992,1.560413,0.684081,14485543.0
4,Afghanistan,AFG,1993,1.560768,0.68433,15816601.0
5,Afghanistan,AFG,1994,1.561178,0.684579,17075728.0


In [7]:
# checking data types of values
adhd_df_filtered.dtypes

Entity                                                                                                    object
Code                                                                                                      object
Year                                                                                                       int64
Prevalence - Attention-deficit/hyperactivity disorder - Sex: Male - Age: Age-standardized (Percent)      float64
Prevalence - Attention-deficit/hyperactivity disorder - Sex: Female - Age: Age-standardized (Percent)    float64
Population (historical estimates)                                                                        float64
dtype: object

In [8]:
# changing prevalence column names
adhd_df_filtered = adhd_df_filtered.rename(columns = {
    'Prevalence - Attention-deficit/hyperactivity disorder - Sex: Male - Age: Age-standardized (Percent)': 'Prevalance_In_Males',
'Prevalence - Attention-deficit/hyperactivity disorder - Sex: Female - Age: Age-standardized (Percent)': 'Prevalance_In_Females',
'Population (historical estimates)': 'Population'})

In [9]:
# adding column defining diagnosis
adhd_df_filtered['Diagnosis'] = 'ADHD'
adhd_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalance_In_Males,Prevalance_In_Females,Population,Diagnosis
1,Afghanistan,AFG,1990,1.559695,0.683613,12412311.0,ADHD
2,Afghanistan,AFG,1991,1.560023,0.683845,13299016.0,ADHD
3,Afghanistan,AFG,1992,1.560413,0.684081,14485543.0,ADHD
4,Afghanistan,AFG,1993,1.560768,0.684330,15816601.0,ADHD
5,Afghanistan,AFG,1994,1.561178,0.684579,17075728.0,ADHD
...,...,...,...,...,...,...,...
56392,Zimbabwe,ZWE,2013,1.929572,0.836387,13350378.0,ADHD
56393,Zimbabwe,ZWE,2014,1.932201,0.836935,13586710.0,ADHD
56394,Zimbabwe,ZWE,2015,1.935371,0.837626,13814642.0,ADHD
56395,Zimbabwe,ZWE,2016,1.939094,0.838419,14030338.0,ADHD


# Cleaning Asperger's Data

In [10]:
asperger_df.head()

Unnamed: 0,Entity,Code,Year,Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Male - Age: Age-standardized (Percent),Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates),Continent
0,Abkhazia,OWID_ABK,2015,,,,Asia
1,Afghanistan,AFG,1990,1.045411,0.236704,12412311.0,
2,Afghanistan,AFG,1991,1.045548,0.236713,13299016.0,
3,Afghanistan,AFG,1992,1.045715,0.236731,14485543.0,
4,Afghanistan,AFG,1993,1.04577,0.236749,15816601.0,


In [11]:
# we're only focusing on years 1990+, no other values so we're filtering the rows with numbers other than 1990+
asperger_df_filtered = asperger_df[asperger_df['Year'] >= 1990]

# dropping continent column as this is where a lot of our NaN values are coming from
asperger_df_filtered = asperger_df_filtered.drop(columns = 'Continent')
asperger_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Male - Age: Age-standardized (Percent),Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
0,Abkhazia,OWID_ABK,2015,,,
1,Afghanistan,AFG,1990,1.045411,0.236704,12412311.0
2,Afghanistan,AFG,1991,1.045548,0.236713,13299016.0
3,Afghanistan,AFG,1992,1.045715,0.236731,14485543.0
4,Afghanistan,AFG,1993,1.045770,0.236749,15816601.0
...,...,...,...,...,...,...
56510,Zimbabwe,ZWE,2018,,,14438812.0
56511,Zimbabwe,ZWE,2019,,,14645473.0
56512,Zimbabwe,ZWE,2020,,,14862927.0
56513,Zimbabwe,ZWE,2021,,,15092171.0


In [12]:
# dropping all NA values
asperger_df_filtered = asperger_df_filtered.dropna()

In [13]:
asperger_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Male - Age: Age-standardized (Percent),Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
1,Afghanistan,AFG,1990,1.045411,0.236704,12412311.0
2,Afghanistan,AFG,1991,1.045548,0.236713,13299016.0
3,Afghanistan,AFG,1992,1.045715,0.236731,14485543.0
4,Afghanistan,AFG,1993,1.045770,0.236749,15816601.0
5,Afghanistan,AFG,1994,1.045838,0.236751,17075728.0
...,...,...,...,...,...,...
56277,Zimbabwe,ZWE,2012,0.972759,0.217398,13115149.0
56278,Zimbabwe,ZWE,2013,0.973844,0.217514,13350378.0
56279,Zimbabwe,ZWE,2014,0.974740,0.217633,13586710.0
56280,Zimbabwe,ZWE,2015,0.975532,0.217741,13814642.0


In [14]:
# checking data types of values
asperger_df_filtered.dtypes

Entity                                                                                                                   object
Code                                                                                                                     object
Year                                                                                                                      int64
Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Male - Age: Age-standardized (Percent)      float64
Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Female - Age: Age-standardized (Percent)    float64
Population (historical estimates)                                                                                       float64
dtype: object

In [15]:
# changing prevalence column names
asperger_df_filtered = asperger_df_filtered.rename(columns = {
    'Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Male - Age: Age-standardized (Percent)': 'Prevalance_In_Males',
'Prevalence - Asperger syndrome and other autistic spectrum disorders - Sex: Female - Age: Age-standardized (Percent)': 'Prevalance_In_Females',
'Population (historical estimates)': 'Population'})

In [16]:
asperger_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalance_In_Males,Prevalance_In_Females,Population
1,Afghanistan,AFG,1990,1.045411,0.236704,12412311.0
2,Afghanistan,AFG,1991,1.045548,0.236713,13299016.0
3,Afghanistan,AFG,1992,1.045715,0.236731,14485543.0
4,Afghanistan,AFG,1993,1.045770,0.236749,15816601.0
5,Afghanistan,AFG,1994,1.045838,0.236751,17075728.0
...,...,...,...,...,...,...
56277,Zimbabwe,ZWE,2012,0.972759,0.217398,13115149.0
56278,Zimbabwe,ZWE,2013,0.973844,0.217514,13350378.0
56279,Zimbabwe,ZWE,2014,0.974740,0.217633,13586710.0
56280,Zimbabwe,ZWE,2015,0.975532,0.217741,13814642.0


In [17]:
# adding column defining diagnosis
asperger_df_filtered['Diagnosis'] = 'Asperger'
asperger_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalance_In_Males,Prevalance_In_Females,Population,Diagnosis
1,Afghanistan,AFG,1990,1.045411,0.236704,12412311.0,Asperger
2,Afghanistan,AFG,1991,1.045548,0.236713,13299016.0,Asperger
3,Afghanistan,AFG,1992,1.045715,0.236731,14485543.0,Asperger
4,Afghanistan,AFG,1993,1.045770,0.236749,15816601.0,Asperger
5,Afghanistan,AFG,1994,1.045838,0.236751,17075728.0,Asperger
...,...,...,...,...,...,...,...
56277,Zimbabwe,ZWE,2012,0.972759,0.217398,13115149.0,Asperger
56278,Zimbabwe,ZWE,2013,0.973844,0.217514,13350378.0,Asperger
56279,Zimbabwe,ZWE,2014,0.974740,0.217633,13586710.0,Asperger
56280,Zimbabwe,ZWE,2015,0.975532,0.217741,13814642.0,Asperger


# Cleaning Autism Data

In [18]:
autism_df.head()

Unnamed: 0,Entity,Code,Year,Prevalence - Autism - Sex: Male - Age: Age-standardized (Percent),Prevalence - Autism - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates),Continent
0,Abkhazia,OWID_ABK,2015,,,,Asia
1,Afghanistan,AFG,1990,0.313685,0.129473,12412311.0,
2,Afghanistan,AFG,1991,0.312901,0.128978,13299016.0,
3,Afghanistan,AFG,1992,0.312296,0.128577,14485543.0,
4,Afghanistan,AFG,1993,0.311659,0.12819,15816601.0,


In [19]:
# we're only focusing on years 1990+, no other values so we're filtering the rows with numbers other than 1990+
autism_df_filtered = autism_df[autism_df['Year'] >= 1990]

# dropping continent column as this is where a lot of our NaN values are coming from
autism_df_filtered = autism_df_filtered.drop(columns = 'Continent')
autism_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalence - Autism - Sex: Male - Age: Age-standardized (Percent),Prevalence - Autism - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
0,Abkhazia,OWID_ABK,2015,,,
1,Afghanistan,AFG,1990,0.313685,0.129473,12412311.0
2,Afghanistan,AFG,1991,0.312901,0.128978,13299016.0
3,Afghanistan,AFG,1992,0.312296,0.128577,14485543.0
4,Afghanistan,AFG,1993,0.311659,0.128190,15816601.0
...,...,...,...,...,...,...
56510,Zimbabwe,ZWE,2018,,,14438812.0
56511,Zimbabwe,ZWE,2019,,,14645473.0
56512,Zimbabwe,ZWE,2020,,,14862927.0
56513,Zimbabwe,ZWE,2021,,,15092171.0


In [20]:
# dropping all NA values
autism_df_filtered = autism_df_filtered.dropna()

In [21]:
autism_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalence - Autism - Sex: Male - Age: Age-standardized (Percent),Prevalence - Autism - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
1,Afghanistan,AFG,1990,0.313685,0.129473,12412311.0
2,Afghanistan,AFG,1991,0.312901,0.128978,13299016.0
3,Afghanistan,AFG,1992,0.312296,0.128577,14485543.0
4,Afghanistan,AFG,1993,0.311659,0.128190,15816601.0
5,Afghanistan,AFG,1994,0.311027,0.127810,17075728.0
...,...,...,...,...,...,...
56277,Zimbabwe,ZWE,2012,0.304222,0.138132,13115149.0
56278,Zimbabwe,ZWE,2013,0.304556,0.138163,13350378.0
56279,Zimbabwe,ZWE,2014,0.304831,0.138197,13586710.0
56280,Zimbabwe,ZWE,2015,0.305075,0.138223,13814642.0


In [22]:
# checking data types of values
autism_df_filtered.dtypes

Entity                                                                  object
Code                                                                    object
Year                                                                     int64
Prevalence - Autism - Sex: Male - Age: Age-standardized (Percent)      float64
Prevalence - Autism - Sex: Female - Age: Age-standardized (Percent)    float64
Population (historical estimates)                                      float64
dtype: object

In [23]:
# changing prevalence column names
autism_df_filtered = autism_df_filtered.rename(columns = {
    'Prevalence - Autism - Sex: Male - Age: Age-standardized (Percent)': 'Prevalance_In_Males',
'Prevalence - Autism - Sex: Female - Age: Age-standardized (Percent)': 'Prevalance_In_Females',
'Population (historical estimates)': 'Population'})

In [24]:
autism_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalance_In_Males,Prevalance_In_Females,Population
1,Afghanistan,AFG,1990,0.313685,0.129473,12412311.0
2,Afghanistan,AFG,1991,0.312901,0.128978,13299016.0
3,Afghanistan,AFG,1992,0.312296,0.128577,14485543.0
4,Afghanistan,AFG,1993,0.311659,0.128190,15816601.0
5,Afghanistan,AFG,1994,0.311027,0.127810,17075728.0
...,...,...,...,...,...,...
56277,Zimbabwe,ZWE,2012,0.304222,0.138132,13115149.0
56278,Zimbabwe,ZWE,2013,0.304556,0.138163,13350378.0
56279,Zimbabwe,ZWE,2014,0.304831,0.138197,13586710.0
56280,Zimbabwe,ZWE,2015,0.305075,0.138223,13814642.0


In [25]:
# adding column defining diagnosis
autism_df_filtered['Diagnosis'] = 'Autism'
autism_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalance_In_Males,Prevalance_In_Females,Population,Diagnosis
1,Afghanistan,AFG,1990,0.313685,0.129473,12412311.0,Autism
2,Afghanistan,AFG,1991,0.312901,0.128978,13299016.0,Autism
3,Afghanistan,AFG,1992,0.312296,0.128577,14485543.0,Autism
4,Afghanistan,AFG,1993,0.311659,0.128190,15816601.0,Autism
5,Afghanistan,AFG,1994,0.311027,0.127810,17075728.0,Autism
...,...,...,...,...,...,...,...
56277,Zimbabwe,ZWE,2012,0.304222,0.138132,13115149.0,Autism
56278,Zimbabwe,ZWE,2013,0.304556,0.138163,13350378.0,Autism
56279,Zimbabwe,ZWE,2014,0.304831,0.138197,13586710.0,Autism
56280,Zimbabwe,ZWE,2015,0.305075,0.138223,13814642.0,Autism


# Cleaning Intellectual Disability Data

In [26]:
idd_df.head()

Unnamed: 0,Entity,Code,Year,Prevalence - Idiopathic developmental intellectual disability - Sex: Male - Age: Age-standardized (Percent),Prevalence - Idiopathic developmental intellectual disability - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates),Continent
0,Abkhazia,OWID_ABK,2015,,,,Asia
1,Afghanistan,AFG,1990,5.716282,4.635575,12412311.0,
2,Afghanistan,AFG,1991,6.082564,4.917924,13299016.0,
3,Afghanistan,AFG,1992,6.409605,5.170105,14485543.0,
4,Afghanistan,AFG,1993,6.673168,5.373784,15816601.0,


In [27]:
# we're only focusing on years 1990+, no other values so we're filtering the rows with numbers other than 1990+
idd_df_filtered = idd_df[idd_df['Year'] >= 1990]

# dropping continent column as this is where a lot of our NaN values are coming from
idd_df_filtered = idd_df_filtered.drop(columns = 'Continent')
idd_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalence - Idiopathic developmental intellectual disability - Sex: Male - Age: Age-standardized (Percent),Prevalence - Idiopathic developmental intellectual disability - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
0,Abkhazia,OWID_ABK,2015,,,
1,Afghanistan,AFG,1990,5.716282,4.635575,12412311.0
2,Afghanistan,AFG,1991,6.082564,4.917924,13299016.0
3,Afghanistan,AFG,1992,6.409605,5.170105,14485543.0
4,Afghanistan,AFG,1993,6.673168,5.373784,15816601.0
...,...,...,...,...,...,...
56624,Zimbabwe,ZWE,2018,,,14438812.0
56625,Zimbabwe,ZWE,2019,,,14645473.0
56626,Zimbabwe,ZWE,2020,,,14862927.0
56627,Zimbabwe,ZWE,2021,,,15092171.0


In [28]:
# dropping all NA values
idd_df_filtered = idd_df_filtered.dropna()

In [29]:
idd_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalence - Idiopathic developmental intellectual disability - Sex: Male - Age: Age-standardized (Percent),Prevalence - Idiopathic developmental intellectual disability - Sex: Female - Age: Age-standardized (Percent),Population (historical estimates)
1,Afghanistan,AFG,1990,5.716282,4.635575,12412311.0
2,Afghanistan,AFG,1991,6.082564,4.917924,13299016.0
3,Afghanistan,AFG,1992,6.409605,5.170105,14485543.0
4,Afghanistan,AFG,1993,6.673168,5.373784,15816601.0
5,Afghanistan,AFG,1994,6.849740,5.510409,17075728.0
...,...,...,...,...,...,...
56392,Zimbabwe,ZWE,2013,0.676245,0.635314,13350378.0
56393,Zimbabwe,ZWE,2014,0.668882,0.627092,13586710.0
56394,Zimbabwe,ZWE,2015,0.657862,0.615192,13814642.0
56395,Zimbabwe,ZWE,2016,0.642503,0.598988,14030338.0


In [30]:
# checking data types of values
idd_df_filtered.dtypes

Entity                                                                                                            object
Code                                                                                                              object
Year                                                                                                               int64
Prevalence - Idiopathic developmental intellectual disability - Sex: Male - Age: Age-standardized (Percent)      float64
Prevalence - Idiopathic developmental intellectual disability - Sex: Female - Age: Age-standardized (Percent)    float64
Population (historical estimates)                                                                                float64
dtype: object

In [31]:
# changing prevalence column names
idd_df_filtered = idd_df_filtered.rename(columns = {
    'Prevalence - Idiopathic developmental intellectual disability - Sex: Male - Age: Age-standardized (Percent)': 'Prevalance_In_Males',
'Prevalence - Idiopathic developmental intellectual disability - Sex: Female - Age: Age-standardized (Percent)': 'Prevalance_In_Females',
'Population (historical estimates)': 'Population'})

In [32]:
idd_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalance_In_Males,Prevalance_In_Females,Population
1,Afghanistan,AFG,1990,5.716282,4.635575,12412311.0
2,Afghanistan,AFG,1991,6.082564,4.917924,13299016.0
3,Afghanistan,AFG,1992,6.409605,5.170105,14485543.0
4,Afghanistan,AFG,1993,6.673168,5.373784,15816601.0
5,Afghanistan,AFG,1994,6.849740,5.510409,17075728.0
...,...,...,...,...,...,...
56392,Zimbabwe,ZWE,2013,0.676245,0.635314,13350378.0
56393,Zimbabwe,ZWE,2014,0.668882,0.627092,13586710.0
56394,Zimbabwe,ZWE,2015,0.657862,0.615192,13814642.0
56395,Zimbabwe,ZWE,2016,0.642503,0.598988,14030338.0


In [33]:
# adding column defining diagnosis
idd_df_filtered['Diagnosis'] = 'IDD'
idd_df_filtered

Unnamed: 0,Entity,Code,Year,Prevalance_In_Males,Prevalance_In_Females,Population,Diagnosis
1,Afghanistan,AFG,1990,5.716282,4.635575,12412311.0,IDD
2,Afghanistan,AFG,1991,6.082564,4.917924,13299016.0,IDD
3,Afghanistan,AFG,1992,6.409605,5.170105,14485543.0,IDD
4,Afghanistan,AFG,1993,6.673168,5.373784,15816601.0,IDD
5,Afghanistan,AFG,1994,6.849740,5.510409,17075728.0,IDD
...,...,...,...,...,...,...,...
56392,Zimbabwe,ZWE,2013,0.676245,0.635314,13350378.0,IDD
56393,Zimbabwe,ZWE,2014,0.668882,0.627092,13586710.0,IDD
56394,Zimbabwe,ZWE,2015,0.657862,0.615192,13814642.0,IDD
56395,Zimbabwe,ZWE,2016,0.642503,0.598988,14030338.0,IDD


In [34]:
# combining all dataframes together to be exported to csv file and postgres
# added index column to be named as primary key for postgres
tables = [adhd_df_filtered, asperger_df_filtered, autism_df_filtered, idd_df_filtered]
final_table = pd.concat(tables, ignore_index=True, sort=True)
final_table.reset_index(inplace=True)
final_table

Unnamed: 0,index,Code,Diagnosis,Entity,Population,Prevalance_In_Females,Prevalance_In_Males,Year
0,0,AFG,ADHD,Afghanistan,12412311.0,0.683613,1.559695,1990
1,1,AFG,ADHD,Afghanistan,13299016.0,0.683845,1.560023,1991
2,2,AFG,ADHD,Afghanistan,14485543.0,0.684081,1.560413,1992
3,3,AFG,ADHD,Afghanistan,15816601.0,0.684330,1.560768,1993
4,4,AFG,ADHD,Afghanistan,17075728.0,0.684579,1.561178,1994
...,...,...,...,...,...,...,...,...
21555,21555,ZWE,IDD,Zimbabwe,13350378.0,0.635314,0.676245,2013
21556,21556,ZWE,IDD,Zimbabwe,13586710.0,0.627092,0.668882,2014
21557,21557,ZWE,IDD,Zimbabwe,13814642.0,0.615192,0.657862,2015
21558,21558,ZWE,IDD,Zimbabwe,14030338.0,0.598988,0.642503,2016


In [35]:
# create class for "final_table"

Base = declarative_base()

class neuroDiagnosis(Base):
    __tablename__ = 'disorders'
    index = Column(Integer(), primary_key = True)
    code = Column(String())
    diagnosis = Column(String())
    entity = Column(String())
    population = Column(Float())
    prevalence_in_females = Column(Float())
    prevalence_in_males = Column(Float())
    year = Column(Integer())

In [36]:
# our password to get into postgres is needed, to keep password from being exposed, the .py file has been placed in
# a .gitignore file
from secret import password

# Export df to Postgres
# this will create our table named "neuro_disorder_db" within postgresql
path = (f"postgresql://postgres:{password}@localhost:5432/neuro_disorder_db")
engine = create_engine(path, echo=True)
Base.metadata.create_all(engine)

2022-01-14 22:55:46,450 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2022-01-14 22:55:46,451 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-01-14 22:55:46,457 INFO sqlalchemy.engine.Engine select current_schema()
2022-01-14 22:55:46,458 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-01-14 22:55:46,481 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2022-01-14 22:55:46,485 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-01-14 22:55:46,503 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-01-14 22:55:46,506 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2022-01-14 22:55:46,507 INFO sqlalchemy.engine.Engine [generated in 0.00153s] {'name': 'disorders'}
2022-01-14 22:55:46,527 INFO sqlalchemy.engine.Engine 
CREATE TABLE disorders (
	index SERIAL NOT NULL, 
	code VARCHAR, 
	diagnosis VARCHAR, 
	entity VARCHAR, 
	population FLOAT, 
	preval

In [37]:
# this exports our 'final_table' dataframe to a csv file, so that once our table is created using the above code,
# the user will only need to import the csv file containing all our data listed below into the postgresql table
final_table.to_csv('final_table_clean.csv', index=False)