# 1. Swedish demographic data (*)

Go to Swedish-language wikipedia page Sveriges demografi.

[https://sv.wikipedia.org/wiki/Sveriges_demografi]


In [4]:
import pandas as pd

#a) Read in the table under "Befolkningsstatistik sedan 1900" into a DataFrame
# https://sv.wikipedia.org/wiki/Sveriges_demografi

link = "https://sv.wikipedia.org/wiki/Sveriges_demografi"
swedenpop = pd.read_html(link, 
    match="Befolkningsförändringar",
    thousands=" ",
    decimal=","
    )[0]

# Chooses swedish standards for thousands (whitespace)
# and , as decimal divider
# NOTE if " " doesn't work, use "\xa0"

# Change name on year column
swedenpop = swedenpop.rename({"Unnamed: 0":"Årtal"}, axis="columns")

# All columns:
# 'Årtal', 'Folkmängd', 'Födda', 'Döda', 'Befolkningsförändringar',
#       'Nativiteten (per 1000)', 'Dödstalen (per 1000)',
#       'Befolkningsförändringar (per 1000)', 'Total fertilitet']
swedenpop


Unnamed: 0,Årtal,Folkmängd,Födda,Döda,Befolkningsförändringar,Nativiteten (per 1000),Dödstalen (per 1000),Befolkningsförändringar (per 1000),Total fertilitet
0,1900,5117000,138139,86146,51993,27.0,16.8,10.2,4.02
1,1901,5156000,139370,82772,56598,27.0,16.1,11.0,4.04
2,1902,5187000,137364,79722,57642,26.5,15.4,11.1,3.95
3,1903,5210000,133896,78610,55286,25.7,15.1,10.6,3.82
4,1904,5241000,134952,80152,54800,25.7,15.3,10.5,3.83
...,...,...,...,...,...,...,...,...,...
116,2016,9995000,117425,90982,26443,11.8,9.2,2.6,1.85
117,2017,10120000,115416,91972,23444,11.4,9.1,2.3,1.79
118,2018,10230000,115832,92185,23647,11.3,9.0,2.3,1.75
119,2019,10327589,114523,88766,28727,11.1,8.7,2.4,1.70


In [21]:
import plotly_express as px
import matplotlib.pyplot as plt

#b) Choose to do some EDA (exploratory data analysis) on this dataset. And draw some relevant graphs.

# Plot a lot of plots
#
# fig1: population
fig1 = px.line(swedenpop, 
    x="Årtal", 
    y="Folkmängd", 
    title="Demography 1900 to 2020", 
    range_y=[0,11e6])
fig1.show()


# fig2: born and dead, and difference
fig2 = px.line(swedenpop, 
    x="Årtal", 
    y=["Födda","Döda","Befolkningsförändringar"])
fig2.show()    

# fig3: nativity, differences deathnumber per 1000
fig3 = px.line(swedenpop, 
    x="Årtal", 
    y=["Nativiteten (per 1000)","Dödstalen (per 1000)","Befolkningsförändringar (per 1000)"])
fig3.show()    

# fig4: fertility
fig4 = px.line(swedenpop, 
    x="Årtal", 
    y="Total fertilitet",
    range_y=[0,5])
fig4.show()    


In [26]:
# Plot a few more inte-connected plots
#
fig1 = px.line(swedenpop, 
    x="Folkmängd", 
    y=["Födda","Döda","Befolkningsförändringar"], 
    title="Demography 1900 to 2020")
fig1.show()

fig2 = px.line(swedenpop, 
    x="Folkmängd", 
    y=["Nativiteten (per 1000)","Dödstalen (per 1000)","Befolkningsförändringar (per 1000)"])
fig2.show()

"""
# fig2: born and dead, and difference
fig2 = px.line(swedenpop, 
    x="Årtal", 
    y=["Födda","Döda","Befolkningsförändringar"])
fig2.show()    

# fig3: nativity, differences deathnumber per 1000
fig3 = px.line(swedenpop, 
    x="Årtal", 
    y=["Nativiteten (per 1000)","Dödstalen (per 1000)","Befolkningsförändringar (per 1000)"])
fig3.show()    

# fig4: fertility
fig4 = px.line(swedenpop, 
    x="Årtal", 
    y="Total fertilitet",
    range_y=[0,5])
fig4.show() """;



In [52]:
# c) Now we want to go backwards in time (before 1900) to see how population has changed
#  in Sweden. Read in the table under history and keep the data of "Folkmängd" from 1570-1865.

link = "https://sv.wikipedia.org/wiki/Sveriges_demografi"
swedenpopold = pd.read_html(link, 
    match="Vid utgången av år",
    thousands="\xa0",
    decimal=",",
    )[0].drop([7,8,9,10], axis="rows"
    ).drop(["Årlig tillväxt"], axis="columns"
    ).rename({"Vid utgången av år":"Årtal"}, axis="columns")
# Chooses swedish standards for thousands (whitespace)
# and , as decimal divider
# NOTE if " " doesn't work, use "\xa0"
# removes data after 1865 (already included above)
# removes columns under "Årlig tillväxt", not used here
# renames column to year

# Rename the double column names:
#MultiIndex([(    'Årtal',     'Årtal'),
#            ('Folkmängd', 'Folkmängd')],
swedenpopold = swedenpopold.set_axis(["Årtal", "Folkmängd"], axis=1, inplace=False)
swedenpopold


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



Unnamed: 0,Årtal,Folkmängd
0,1570,900000
1,1650,1225000
2,1700,1485000
3,1720,1350000
4,1755,1878000
5,1815,2465000
6,1865,4099000


In [80]:
 # d) Now concatenate this with the table from 1900 so that you have population data 
 # from 1570 to 2020. Note that you may need to clean the data in order for it to fit 
 # properly. Also you may be able to do this in several ways.

swedenpoptot = pd.concat([swedenpopold,swedenpop[["Årtal","Folkmängd"]]]).astype(int)
swedenpoptot.head(10)


Unnamed: 0,Årtal,Folkmängd
0,1570,900000
1,1650,1225000
2,1700,1485000
3,1720,1350000
4,1755,1878000
5,1815,2465000
6,1865,4099000
0,1900,5117000
1,1901,5156000
2,1902,5187000


In [96]:
#e) Draw a graph of population data from 1570-2020.
fig = px.line(swedenpoptot, x="Årtal", y="Folkmängd", markers=True, range_y=[0,11e6])
fig.show()


# 2. Denmark demographic data (*)

Go to the Danish-language wikipedia page Danmarks demografi. https://da.wikipedia.org/wiki/Danmarks_demografi

  a) Read in the table under "Demografiske data" into a DataFrame (*)

  b) Clean the data and draw a graph of population against year from 1769-2020. (**)