In [1]:
import json
import pandas as pd
import geopandas as gpd
import numpy as np
from sqlalchemy import create_engine

# Change in working age population 1999 - 2019 

— Change in working age population (20-64 years) 1999-2019 (Lorraine: 1999-2017)

— Territorial entities: districts (Wallonia), employment areas (Lorraine), Grand Duchy (Luxembourg), Kreise (Saarland, Rheinland-Pfalz)

— Statistical data sources: Destatis, INSEE, Statbel, STATEC. Calculations: OIE/IBA 2020

— Geodata sources: Act Luxembourg 2017, IGN France 2017, GeoBasis-DE/BKG 2017, NGI-Belgium 2017. Harmonisation: SIG-GR/GIS-GR 2020

In [2]:
workchange = gpd.read_file('/Users/szabonikolett/Downloads/change-in-working-age-population-1999-2019-2022-evo-pop-20-64years-1999-2019-0.geojson')

In [3]:
workchange

Unnamed: 0,OBJECTID,name,region,de_entity,fr_entity,en_entity,Pop_20_64_1999,Pop_20_64_2019,Evo_20_64_1999_2019_txt,reference_period,description,geometry
0,1,Nivelles,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),205678,233225,"+13,4%",1999-2019,,"POLYGON ((4.75090 50.80705, 4.74888 50.80724, ..."
1,2,Mons,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),146238,152166,"+4,1%",1999-2019,,"POLYGON ((3.91197 50.61542, 3.91230 50.61647, ..."
2,3,Huy,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),57971,66806,"+15,2%",1999-2019,,"POLYGON ((5.29186 50.63330, 5.28140 50.63716, ..."
3,4,Liège,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),346789,365136,"+5,3%",1999-2019,,"POLYGON ((5.69315 50.81199, 5.69263 50.81205, ..."
4,5,Waremme,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),39772,47761,"+20,1%",1999-2019,,"MULTIPOLYGON (((5.34921 50.63342, 5.34873 50.6..."
...,...,...,...,...,...,...,...,...,...,...,...,...
74,75,Soignies,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),54719,61045,"+11,6%",1999-2019,,"POLYGON ((4.09185 50.66885, 4.08809 50.66575, ..."
75,76,Thuin,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),51206,52984,"+3,5%",1999-2019,,"POLYGON ((4.29384 50.42438, 4.29343 50.42548, ..."
76,77,Tournai-Mouscron,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),122277,128896,"+5,4%",1999-2019,,"MULTIPOLYGON (((3.45931 50.76215, 3.45996 50.7..."
77,78,La Louvière,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),79095,82335,"+4,1%",1999-2019,,"POLYGON ((4.20365 50.51621, 4.18635 50.52517, ..."


In [4]:
def clean_data(df):
    df.columns = [str.lower(col).replace(' ', '_') for col in df.columns]
    return df

In [5]:
clean_data(workchange).head()

Unnamed: 0,objectid,name,region,de_entity,fr_entity,en_entity,pop_20_64_1999,pop_20_64_2019,evo_20_64_1999_2019_txt,reference_period,description,geometry
0,1,Nivelles,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),205678,233225,"+13,4%",1999-2019,,"POLYGON ((4.75090 50.80705, 4.74888 50.80724, ..."
1,2,Mons,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),146238,152166,"+4,1%",1999-2019,,"POLYGON ((3.91197 50.61542, 3.91230 50.61647, ..."
2,3,Huy,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),57971,66806,"+15,2%",1999-2019,,"POLYGON ((5.29186 50.63330, 5.28140 50.63716, ..."
3,4,Liège,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),346789,365136,"+5,3%",1999-2019,,"POLYGON ((5.69315 50.81199, 5.69263 50.81205, ..."
4,5,Waremme,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),39772,47761,"+20,1%",1999-2019,,"MULTIPOLYGON (((5.34921 50.63342, 5.34873 50.6..."


In [6]:
# renaming columns
workchange.rename(columns={'name': 'city' , 'pop_20_64_1999': 'population_1999', 'pop_20_64_2019': 'population_2019', 'evo_20_64_1999_2019_txt': 'change_in_percentage'}, inplace=True)

In [7]:
workchange['city'].value_counts() 

Neufchâteau                2
Nivelles                   1
Merzig-Wadern              1
Nancy                      1
Lunéville                  1
                          ..
Eifelkreis Bitburg-Prüm    1
Bernkastel-Wittlich        1
Trier                      1
Westerwaldkreis            1
Luxembourg                 1
Name: city, Length: 78, dtype: int64

In [8]:
# checking the one city listed twice. Not a duplicate, can't be dropped, details differ so keeping both
workchange.loc[workchange['city'] == 'Neufchâteau']

Unnamed: 0,objectid,city,region,de_entity,fr_entity,en_entity,population_1999,population_2019,change_in_percentage,reference_period,description,geometry
8,9,Neufchâteau,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),31076,36428,"+17,2%",1999-2019,,"POLYGON ((5.06469 50.10367, 5.06135 50.10537, ..."
69,70,Neufchâteau,LOR,Arbeitsmarktregion (zone d'emploi),Zone d'emploi,Employment area (zone d'emploi),31245,26703,"-14,5%",1999-2017,,"POLYGON ((5.74610 48.47625, 5.74243 48.47450, ..."


In [9]:
workchange.isna().sum() # no NaN values to deal with

objectid                0
city                    0
region                  0
de_entity               0
fr_entity               0
en_entity               0
population_1999         0
population_2019         0
change_in_percentage    0
reference_period        0
description             0
geometry                0
dtype: int64

In [10]:
#workchange.to_csv('/Users/szabonikolett/Downloads/workchange.csv')

Following columns will be dropped:
- desription (it has no values)
- reg_preiod (the whole df is about a fix time frame, hence pointless to keep)
- goemetry (might be good in Tableau, but for analysis it seves no useful info)

In [11]:
# general cleaning:  removing % + , ; dropping redundant columns 
workchange['change_in_percentage'] = workchange['change_in_percentage'].str.replace('%', '')
workchange['change_in_percentage'] = workchange['change_in_percentage'].str.replace(',', '.')
workchange['change_in_percentage'] = workchange['change_in_percentage'].str.replace('+', '')

workchange = workchange.drop(['objectid', 'de_entity', 'fr_entity', 'en_entity', 'description', 'reference_period' ,'geometry'], axis=1)
workchange

  workchange['change_in_percentage'] = workchange['change_in_percentage'].str.replace('+', '')


Unnamed: 0,city,region,population_1999,population_2019,change_in_percentage
0,Nivelles,WAL,205678,233225,13.4
1,Mons,WAL,146238,152166,4.1
2,Huy,WAL,57971,66806,15.2
3,Liège,WAL,346789,365136,5.3
4,Waremme,WAL,39772,47761,20.1
...,...,...,...,...,...
74,Soignies,WAL,54719,61045,11.6
75,Thuin,WAL,51206,52984,3.5
76,Tournai-Mouscron,WAL,122277,128896,5.4
77,La Louvière,WAL,79095,82335,4.1


In [12]:
workchange.dtypes

city                    object
region                  object
population_1999          int64
population_2019          int64
change_in_percentage    object
dtype: object

In [None]:
#workchange['change_in_percentage'] = workchange['change_in_precentage].astype(float)

In [13]:
workchange['population_2019'].mean()

87202.58227848102

In [None]:
# later on I used a merged dataset so this is not the one 

#connection_string = 'mysql+pymysql://root:' + password + '@localhost/mid_project'
#engine = create_engine(connection_string) # creates the connection to the database

#workchange.to_sql('workchange', con=engine, index=False)


# Projection of working age poupulation 2020-2050

— Projection of working age population (20-64 years) 2020-2050

— Territorial entities: districts (Wallonia), departments (Lorraine), Grand Duchy (Luxembourg), Kreise (Saarland, Rheinland-Pfalz)

— Statistical data sources: INSEE, Statec, Statbel-DEMOBEL, Statistisches Landesamt Rheinland-Pfalz, Statistisches Amt Saarland/Destatis.Calculations: OIE/IBA 2020 — Geodata sources: Act Luxembourg 2017, IGN France 2017, GeoBasis-DE/BKG 2017, NGI-Belgium 2017.Harmonisation: SIG-GR/GIS-GR 2020

— Territorial entities: districts (Wallonia), departments (Lorraine), Grand Duchy (Luxembourg), Kreise (Saarland, Rheinland-Pfalz)

— Statistical data sources: INSEE, Statec, Statbel-DEMOBEL, Statistisches Landesamt Rheinland-Pfalz, Statistisches Amt Saarland/Destatis. Calculations: OIE/IBA 2020

— Geodata sources: Act Luxembourg 2017, IGN France 2017, GeoBasis-DE/BKG 2017, NGI-Belgium 2017. Harmonisation: SIG-GR/GIS-GR 2020

In [14]:
projection = gpd.read_file('/Users/szabonikolett/Downloads/projection-of-working-age-population-2020-2050-2024-projection-2020-2050-0.geojson')

In [15]:
clean_data(projection)

Unnamed: 0,objectid,region,name,de_entity,fr_entity,en_entity,pop_20_64_2020,pop_20_64_2050,evo_pop_20_64_2020_2050_txt,description,geometry
0,1,SAR,Saarland,Bundesland,Land,Federal state,585000,451000,"-22,9%",,"POLYGON ((7.06429 49.62671, 7.06285 49.62778, ..."
1,2,WAL,Nivelles,Kreis (arrondissement),Arrondissement,District (arrondissement),234103,245259,"+4,8%",,"POLYGON ((4.75090 50.80705, 4.74888 50.80724, ..."
2,3,WAL,Ath,Kreis (arrondissement),Arrondissement,District (arrondissement),51539,55221,"+7,1%",,"POLYGON ((3.70908 50.77467, 3.70946 50.77585, ..."
3,4,WAL,Charleroi,Kreis (arrondissement),Arrondissement,District (arrondissement),249171,227580,"-8,7%",,"POLYGON ((4.24081 50.60027, 4.24102 50.60162, ..."
4,5,WAL,Mons,Kreis (arrondissement),Arrondissement,District (arrondissement),151870,145531,"-4,2%",,"POLYGON ((3.91197 50.61542, 3.91230 50.61647, ..."
...,...,...,...,...,...,...,...,...,...,...,...
58,59,RLP,Mainz-Bingen,Landkreis,Arrondissement (Landkreis),District (Landkreis),126775,108510,"-14,4%",,"POLYGON ((7.76660 50.08136, 7.76484 50.08259, ..."
59,60,RLP,Südwestpfalz,Landkreis,Arrondissement (Landkreis),District (Landkreis),55345,40524,"-26,8%",,"POLYGON ((7.60906 49.37297, 7.60780 49.37432, ..."
60,61,WAL,DG Belgien,Verviers - Gemeinden Deutschsprachige Gemeinsc...,Verviers - communes de la Communauté germanophone,Verviers - municipalities of the German community,45838,41196,"-10,1%",,"MULTIPOLYGON (((6.20656 50.52085, 6.20629 50.5..."
61,62,WAL,Verviers - communes francophones,Verviers - Gemeinden franz.sprachige Gemeinschaft,Verviers - communes de la Communauté française,Verviers - municipalities of the French Community,121232,116945,"-3,5%",,"POLYGON ((5.96840 50.76080, 5.96175 50.76194, ..."


In [16]:
projection['description'].value_counts() # empty column

    63
Name: description, dtype: int64

In [17]:
projection['description'].nunique() 

1

In [18]:
# unifying the columns in both dataframes 
# removing % + , ; renaming columns, dropping redundant columns 

projection.rename(columns={'name': 'city', 'pop_20_64_2020': 'population_2020', 'pop_20_64_2050': 'population_2050' , 'evo_pop_20_64_2020_2050_txt': 'pop_projection_percentage'}, inplace=True)

projection['pop_projection_percentage'] = projection['pop_projection_percentage'].str.replace('%', '')                                               
projection['pop_projection_percentage'] = projection['pop_projection_percentage'].str.replace('+', '')
projection['pop_projection_percentage'] = projection['pop_projection_percentage'].str.replace(',', '.') 

#projection = projection.drop(['objectid', 'de_entity', 'fr_entity', 'en_entity', 'description', 'geometry'], axis=1)
projection

  projection['pop_projection_percentage'] = projection['pop_projection_percentage'].str.replace('+', '')


Unnamed: 0,objectid,region,city,de_entity,fr_entity,en_entity,population_2020,population_2050,pop_projection_percentage,description,geometry
0,1,SAR,Saarland,Bundesland,Land,Federal state,585000,451000,-22.9,,"POLYGON ((7.06429 49.62671, 7.06285 49.62778, ..."
1,2,WAL,Nivelles,Kreis (arrondissement),Arrondissement,District (arrondissement),234103,245259,4.8,,"POLYGON ((4.75090 50.80705, 4.74888 50.80724, ..."
2,3,WAL,Ath,Kreis (arrondissement),Arrondissement,District (arrondissement),51539,55221,7.1,,"POLYGON ((3.70908 50.77467, 3.70946 50.77585, ..."
3,4,WAL,Charleroi,Kreis (arrondissement),Arrondissement,District (arrondissement),249171,227580,-8.7,,"POLYGON ((4.24081 50.60027, 4.24102 50.60162, ..."
4,5,WAL,Mons,Kreis (arrondissement),Arrondissement,District (arrondissement),151870,145531,-4.2,,"POLYGON ((3.91197 50.61542, 3.91230 50.61647, ..."
...,...,...,...,...,...,...,...,...,...,...,...
58,59,RLP,Mainz-Bingen,Landkreis,Arrondissement (Landkreis),District (Landkreis),126775,108510,-14.4,,"POLYGON ((7.76660 50.08136, 7.76484 50.08259, ..."
59,60,RLP,Südwestpfalz,Landkreis,Arrondissement (Landkreis),District (Landkreis),55345,40524,-26.8,,"POLYGON ((7.60906 49.37297, 7.60780 49.37432, ..."
60,61,WAL,DG Belgien,Verviers - Gemeinden Deutschsprachige Gemeinsc...,Verviers - communes de la Communauté germanophone,Verviers - municipalities of the German community,45838,41196,-10.1,,"MULTIPOLYGON (((6.20656 50.52085, 6.20629 50.5..."
61,62,WAL,Verviers - communes francophones,Verviers - Gemeinden franz.sprachige Gemeinschaft,Verviers - communes de la Communauté française,Verviers - municipalities of the French Community,121232,116945,-3.5,,"POLYGON ((5.96840 50.76080, 5.96175 50.76194, ..."


In [19]:
projection['city'].value_counts() 

Saarland                     1
Worms                        1
Westerwaldkreis              1
Trier                        1
Bernkastel-Wittlich          1
                            ..
Altenkirchen (Westerwald)    1
Bad Kreuznach                1
Birkenfeld                   1
Cochem-Zell                  1
Luxembourg                   1
Name: city, Length: 63, dtype: int64

In [20]:
projection.dtypes

objectid                        int64
region                         object
city                           object
de_entity                      object
fr_entity                      object
en_entity                      object
population_2020                 int64
population_2050                 int64
pop_projection_percentage      object
description                    object
geometry                     geometry
dtype: object

In [21]:
projection['pop_projection_percentage'] = projection['pop_projection_percentage'].astype(float)

In [22]:
projection.dtypes

objectid                        int64
region                         object
city                           object
de_entity                      object
fr_entity                      object
en_entity                      object
population_2020                 int64
population_2050                 int64
pop_projection_percentage     float64
description                    object
geometry                     geometry
dtype: object

In [23]:
projection.isna().sum() # no NaN values to deal with 

objectid                     0
region                       0
city                         0
de_entity                    0
fr_entity                    0
en_entity                    0
population_2020              0
population_2050              0
pop_projection_percentage    0
description                  0
geometry                     0
dtype: int64

In [24]:
projection['population_2020'].mean()

108656.11111111111

In [26]:
projection['city'].value_counts()

Saarland                     1
Worms                        1
Westerwaldkreis              1
Trier                        1
Bernkastel-Wittlich          1
                            ..
Altenkirchen (Westerwald)    1
Bad Kreuznach                1
Birkenfeld                   1
Cochem-Zell                  1
Luxembourg                   1
Name: city, Length: 63, dtype: int64

In [27]:
workchange['city'].value_counts()

Neufchâteau                2
Nivelles                   1
Merzig-Wadern              1
Nancy                      1
Lunéville                  1
                          ..
Eifelkreis Bitburg-Prüm    1
Bernkastel-Wittlich        1
Trier                      1
Westerwaldkreis            1
Luxembourg                 1
Name: city, Length: 78, dtype: int64

In [28]:
workchange['city'].isin(projection['city']).value_counts

<bound method IndexOpsMixin.value_counts of 0      True
1      True
2      True
3      True
4      True
      ...  
74     True
75     True
76    False
77    False
78     True
Name: city, Length: 79, dtype: bool>

In [None]:
#newdf = pd.concat([workchange, projection], axis=1)
#newdf.head(60)

In [None]:
#newdf.isna().sum()

In [None]:
#df.to_csv('data.csv').
#/Users/szabonikolett/Downloads

#projection.to_csv('/Users/szabonikolett/Downloads/projection.csv')

## Final dataset

In [29]:
# merging them to use it for visualisation
mergeddata = pd.merge(workchange, projection, on=['city'], how='inner')
mergeddata

Unnamed: 0,city,region_x,population_1999,population_2019,change_in_percentage,objectid,region_y,de_entity,fr_entity,en_entity,population_2020,population_2050,pop_projection_percentage,description,geometry
0,Nivelles,WAL,205678,233225,13.4,2,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),234103,245259,4.8,,"POLYGON ((4.75090 50.80705, 4.74888 50.80724, ..."
1,Mons,WAL,146238,152166,4.1,5,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),151870,145531,-4.2,,"POLYGON ((3.91197 50.61542, 3.91230 50.61647, ..."
2,Huy,WAL,57971,66806,15.2,10,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),66892,67229,0.5,,"POLYGON ((5.29186 50.63330, 5.28140 50.63716, ..."
3,Liège,WAL,346789,365136,5.3,11,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),364590,352595,-3.3,,"POLYGON ((5.69315 50.81199, 5.69263 50.81205, ..."
4,Waremme,WAL,39772,47761,20.1,12,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),47990,54426,13.4,,"MULTIPOLYGON (((5.34921 50.63342, 5.34873 50.6..."
5,Arlon,WAL,30562,37753,23.5,13,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),38069,38152,0.2,,"POLYGON ((5.74202 49.85778, 5.73887 49.85690, ..."
6,Bastogne,WAL,22666,28505,25.8,14,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),28774,32972,14.6,,"POLYGON ((5.88697 50.33965, 5.88331 50.34018, ..."
7,Marche-en-Famenne,WAL,28197,33226,17.8,15,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),33141,28629,-13.6,,"POLYGON ((5.44110 50.42069, 5.43369 50.42410, ..."
8,Neufchâteau,WAL,31076,36428,17.2,16,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),36512,38646,5.8,,"POLYGON ((5.06469 50.10367, 5.06135 50.10537, ..."
9,Neufchâteau,LOR,31245,26703,-14.5,16,WAL,Kreis (arrondissement),Arrondissement,District (arrondissement),36512,38646,5.8,,"POLYGON ((5.06469 50.10367, 5.06135 50.10537, ..."


In [30]:
# general cleaning: renaming, and dropping duplicated region_y column
mergeddata.rename(columns={'region_x': 'region'}, inplace=True)

mergeddata = mergeddata.drop(['region_y'], axis=1)

In [31]:
mergeddata.head()

Unnamed: 0,city,region,population_1999,population_2019,change_in_percentage,objectid,de_entity,fr_entity,en_entity,population_2020,population_2050,pop_projection_percentage,description,geometry
0,Nivelles,WAL,205678,233225,13.4,2,Kreis (arrondissement),Arrondissement,District (arrondissement),234103,245259,4.8,,"POLYGON ((4.75090 50.80705, 4.74888 50.80724, ..."
1,Mons,WAL,146238,152166,4.1,5,Kreis (arrondissement),Arrondissement,District (arrondissement),151870,145531,-4.2,,"POLYGON ((3.91197 50.61542, 3.91230 50.61647, ..."
2,Huy,WAL,57971,66806,15.2,10,Kreis (arrondissement),Arrondissement,District (arrondissement),66892,67229,0.5,,"POLYGON ((5.29186 50.63330, 5.28140 50.63716, ..."
3,Liège,WAL,346789,365136,5.3,11,Kreis (arrondissement),Arrondissement,District (arrondissement),364590,352595,-3.3,,"POLYGON ((5.69315 50.81199, 5.69263 50.81205, ..."
4,Waremme,WAL,39772,47761,20.1,12,Kreis (arrondissement),Arrondissement,District (arrondissement),47990,54426,13.4,,"MULTIPOLYGON (((5.34921 50.63342, 5.34873 50.6..."


In [32]:
mergeddata.isna().sum()

city                         0
region                       0
population_1999              0
population_2019              0
change_in_percentage         0
objectid                     0
de_entity                    0
fr_entity                    0
en_entity                    0
population_2020              0
population_2050              0
pop_projection_percentage    0
description                  0
geometry                     0
dtype: int64

In [33]:
# importing the final clean mergeddata to make it easier to visualise it in Tableau

mergeddata.to_csv('/Users/szabonikolett/Downloads/mergeddata.csv')

In [None]:
#workchange.to_sql('MidProject', con, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None, method=None)
# DataFrame.to_sql(name, con, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None, method=None)

# SQL database import - export

In [34]:
# importing the final dataset to SQL Workbench

import pymysql
from sqlalchemy import create_engine

import getpass  # To get the password without showing the input
password = getpass.getpass()

········


In [35]:
connection_string = 'mysql+pymysql://root:' + password + '@localhost/mid_project'
engine = create_engine(connection_string) # creates the connection to the database

mergeddata.to_sql('mergeddata', con=engine, index=False)


In [36]:
# get the data
connection_string = 'mysql+pymysql://root:' + password + '@localhost/mid_project'
engine = create_engine(connection_string) # creates the connection to the database
query = '''SELECT * FROM mergeddata;'''

data = pd.read_sql_query(query, engine)
data.head()

Unnamed: 0,city,region,population_1999,population_2019,change_in_percentage,population_2020,population_2050,pop_projection_percentage
0,Nivelles,WAL,205678,233225,13.4,234103,245259,4.8
1,Mons,WAL,146238,152166,4.1,151870,145531,-4.2
2,Huy,WAL,57971,66806,15.2,66892,67229,0.5
3,Liège,WAL,346789,365136,5.3,364590,352595,-3.3
4,Waremme,WAL,39772,47761,20.1,47990,54426,13.4


## Hypotheses test

In [37]:
workchange['population_1999'].mean()

84964.18987341772

In [38]:
workchange['population_2019'].mean()

87202.58227848102

In [39]:
projection['population_2020'].mean()

108656.11111111111

Choosing two-tailed hypotheses test as in my testing I am trying to test in the alternative hypotheses if the population will either increase or decrease while the one-tailed test's alternative hypotheses would only test whether it changes or not. 


First Hypothesis: 

    - Zero hypothesis: the mean population of 2020 equals to the mean population of 1999
    - Alternative hypothesis: the mean population of 2020 does not equal to the mean population of 2020

In [54]:
# Two sided testing: 
# Null hypothesis: mean of population in 1999 = mean of poupulation of 2020
# Alternative hypothesis: mean population of 1999 will not equal to the mean population of 2020 

# H0 = 
# H1 != 

from scipy.stats import ttest_1samp

stat, pval = ttest_1samp(workchange['population_1999'], projection['population_2020'].mean())

In [55]:
print('Stat is  ', stat)
print('P value for the two-tailed test is ', pval)

# stat - : workchange 1999 is smaller than my base of comp. stat is for < >
# this is the one to KEEP
# reject null, by stat: pop 1999 is less

Stat is   -3.1230767211916812
P value for the two-tailed test is  0.0025115773581651526


Rejecting the Null hypothesis, P value is very low. The high variance between the values resulted in the negative T value, the alternative hypothesis can be accepted, the mean working age population was less in 1999, proving it increased by 2020. 

In [56]:
# Trying one sided test
# Null hypothesis: The population in 2020 will be significantly less =< the population in 1999
# Alternative hypothesis: The population in 2020 will be significantly greater > the population in 1999
# alternative: 2020 population will not be significantly greater than 1999

from scipy.stats import ttest_1samp

stat, pval = ttest_1samp(projection['population_2020'], workchange['population_1999'].mean())

In [57]:
print('Stat is  ', stat)

print('Pvalue for the one-tailed test is ', pval/2)

Stat is   1.5574228009054683
Pvalue for the one-tailed test is  0.06223021212274814


In [None]:
# p value is the evidence against a null hypothesis. The smaller the p-value, the stronger the evidence that the null hypothesis can be reject3ed.