In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import requests

sns.set()

In [None]:
url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101E/FodelselandArK'

data = {
  "query": [
    {
      "code": "Fodelseland",
      "selection": {
        "filter": "vs:LandISOAlfa2-96A3",
        "values": [
          "AF",
          "AL",
          "DZ",
          "AD",
          "AO",
          "AG",
          "AE",
          "AR",
          "AM",
          "AU",
          "AZ",
          "BS",
          "BH",
          "BD",
          "BB",
          "BE",
          "BZ",
          "BJ",
          "BM",
          "BT",
          "BO",
          "BA",
          "BW",
          "BR",
          "VG",
          "BN",
          "BG",
          "BF",
          "BI",
          "CF",
          "CL",
          "CO",
          "KM",
          "CR",
          "CY",
          "DK",
          "DJ",
          "DM",
          "DO",
          "EC",
          "EG",
          "GQ",
          "SV",
          "CI",
          "ER",
          "EE",
          "ET",
          "FJ",
          "PH",
          "FI",
          "FR",
          "GA",
          "GM",
          "GE",
          "GH",
          "GI",
          "GR",
          "GD",
          "GT",
          "GN",
          "GW",
          "GY",
          "HT",
          "HN",
          "HK",
          "IN",
          "ID",
          "IQ",
          "IR",
          "IE",
          "IS",
          "IL",
          "IT",
          "JM",
          "JP",
          "YE",
          "JO",
          "YU",
          "KH",
          "CM",
          "CA",
          "CV",
          "KZ",
          "KE",
          "CN",
          "KG",
          "KI",
          "CG",
          "CD",
          "XK",
          "HR",
          "CU",
          "KW",
          "LA",
          "LS",
          "LV",
          "LB",
          "LR",
          "LY",
          "LI",
          "LT",
          "LU",
          "MG",
          "MW",
          "MY",
          "MV",
          "ML",
          "MT",
          "MA",
          "MH",
          "MR",
          "MU",
          "MX",
          "FM",
          "MZ",
          "MD",
          "MC",
          "MN",
          "ME",
          "MM",
          "NA",
          "NR",
          "NL",
          "NP",
          "NI",
          "NE",
          "NG",
          "KP",
          "MK",
          "NO",
          "NZ",
          "OM",
          "PK",
          "PW",
          "PS",
          "PA",
          "PG",
          "PY",
          "PE",
          "PL",
          "PT",
          "QA",
          "RO",
          "RW",
          "RU",
          "SB",
          "WS",
          "SM",
          "ST",
          "SA",
          "CH",
          "SN",
          "RS",
          "CS",
          "SC",
          "SL",
          "SG",
          "SK",
          "SI",
          "SO",
          "SU",
          "ES",
          "LK",
          "KN",
          "LC",
          "VC",
          "GB",
          "SD",
          "SR",
          "SZ",
          "SE",
          "ZA",
          "KR",
          "SS",
          "SY",
          "TJ",
          "TW",
          "TZ",
          "TD",
          "TH",
          "CZ",
          "QT",
          "TG",
          "TO",
          "TT",
          "TN",
          "TR",
          "TM",
          "TV",
          "DE",
          "UG",
          "UA",
          "HU",
          "UY",
          "US",
          "UZ",
          "VU",
          "VA",
          "VE",
          "VN",
          "BY",
          "ZM",
          "ZW",
          "AT",
          "TL",
          "ÖOF"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "agg:Ålder10år",
        "values": [
          "-4",
          "5-14",
          "15-24",
          "25-34",
          "35-44",
          "45-54",
          "55-64",
          "65-74",
          "75-84",
          "85-94",
          "95+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r = requests.post(url,json=data)
r.status_code

In [None]:
json = r.json()
json['data'][0]

In [None]:
data_list = []

nr_records = (len(json['data']))

for rec in range(nr_records):

    data_list.append ((json['data'][rec]['key'][0],
                        json['data'][rec]['key'][1],
                        json['data'][rec]['key'][2],
                        json['data'][rec]['key'][3],
                        json['data'][rec]['values'][0]))


In [None]:
data_list

In [None]:
migrants = pd.DataFrame(data_list,columns=['abb','age','gender','year','count'])


In [None]:
migrants

In [None]:
country_abb = pd.read_csv('country_abb.csv',sep=';',header=None,keep_default_na=False,
                         encoding='UTF-8')
country_abb.columns = ['abb','country']
country_abb.set_index('abb',inplace=True)
country_abb

In [None]:
country_abb.loc['US']

In [None]:
countr_abb = country_abb[~country_abb.index.duplicated(keep='first')]
country_abb.loc['US']

In [None]:
mask = country_abb.index.duplicated()
country_abb = country_abb[~mask]
country_abb.loc['US']

In [None]:
migrants['count'] = migrants['count'].astype(int)
migrants_orig = migrants.copy()
migrants

In [None]:
migrants['country'] = migrants['abb'].apply(lambda x : country_abb.loc[x,'country'])
migrants

In [None]:
migrants.groupby('year')['count'].sum()

In [None]:
# https://worldpopulationreview.com/country-rankings/muslim-majority-countries
muslim_countries_ratio = pd.read_json('muslim_countries.json')
muslim_countries_ratio['pop2020'] = muslim_countries_ratio['pop2020'].astype(str).str.replace('.','').astype(int)
muslim_countries_ratio['muslimPopulation'] = muslim_countries_ratio['muslimPopulation'].astype(int)
muslim_countries_ratio[['muslim%','worldMuslim%']] = muslim_countries_ratio[['muslim%','worldMuslim%']].astype(float)
muslim_countries_ratio

In [None]:
muslim_countries = (muslim_countries_ratio.loc[muslim_countries_ratio['muslim%'] > 50]).copy()
muslim_countries.sort_values('country',inplace=True)
muslim_countries.set_index('country',inplace=True)
muslim_countries

In [None]:
total_pop = pd.DataFrame({'total_pop':migrants.groupby('year')['count'].sum()})
total_pop

In [None]:
real_migrants = (migrants[migrants['abb'] != 'SE']).copy()
real_migrants

In [None]:
migrants_yearly = real_migrants.groupby('year').sum()


In [None]:
total_pop['foreign_born'] = migrants_yearly
total_pop['native_born'] = total_pop['total_pop'] - total_pop['foreign_born']
total_pop['foreign_born_%'] = total_pop['foreign_born'] / total_pop['total_pop'] * 100
total_pop

In [None]:
total_pop.plot(kind='bar',y='foreign_born_%',
               title='SWE ratio foreign born of population',
              figsize=(18,12))

plt.ylabel('%')
plt.savefig('swe_foreign_born_pct.jpg',format='jpg')

In [None]:
def is_muslim(country):
    '''
    try:
        muslim_countries.loc[country]
    except KeyError:
        return 0
    return 1
    '''
    
    # below didnt work if there are duplicates in the muslim_countries.index
    
    if country in np.array(muslim_countries.index):
        return 1
    return 0
    

In [None]:
real_migrants['muslim'] = real_migrants.apply(lambda row : is_muslim(row.country),axis=1)
real_migrants

In [None]:
migrant_religion = real_migrants.groupby(['year','muslim']).sum().unstack()
migrant_religion.columns=['No','Yes']
migrant_religion.columns.name='muslim'
migrant_religion['ratio_muslim'] = migrant_religion['Yes'] / migrant_religion['No']
migrant_religion

In [None]:
migrant_religion.plot(y='ratio_muslim',kind='bar', figsize=(18,12),
                      title='SWE ratio foreign born from Muslim Majority Countries of all foreign born')
plt.savefig('swe_ratio_muslim_foreign_born.jpg',format='jpg')

In [None]:
all_countries = pd.DataFrame(migrants['country'].unique(),columns=['country'])

In [None]:
scb_migrant_deaths = pd.read_excel('scb_prel_deaths.xlsx',sheet_name='Tabell 9a',skiprows=11,
                                  thousands=',',usecols=[0,1,2,3,6,7,8])

In [None]:
scb_migrant_deaths

In [None]:
scb_migrant_deaths.drop([54,55,56],inplace=True)

In [None]:
scb_migrant_deaths

In [None]:
total_deaths = scb_migrant_deaths.sum()
total_deaths

In [None]:
ratio_weekly_deaths = scb_migrant_deaths.iloc[:-1,1:] / total_deaths
ratio_weekly_deaths


In [None]:
scb_migrant_deaths.iloc[:-1,1:] = scb_migrant_deaths.iloc[:-1,1:] * ratio_weekly_deaths + scb_migrant_deaths.iloc[:-1,1:]

In [None]:
scb_migrant_deaths.index = range(1,len(scb_migrant_deaths) + 1)
scb_migrant_deaths.drop(54,inplace=True)


In [None]:
scb_migrant_deaths

In [None]:
pop_2015_2019 = total_pop.loc['2014' : '2018'].mean()
pop_2020 = total_pop.loc['2019']

pop_2015_2019
pop_2020

In [None]:
tot_15_19_per_M = scb_migrant_deaths['Tot 15-19'] / (pop_2015_2019['total_pop'] / 1e6)
foreign_15_19_per_M = scb_migrant_deaths['UtrF 15-19'] / (pop_2015_2019['foreign_born'] / 1e6)
native_15_19_per_M = scb_migrant_deaths['InrF 15-19'] / (pop_2015_2019['native_born'] / 1e6)
tot_2020_per_M = scb_migrant_deaths['Tot 2020'] / (pop_2020['total_pop'] / 1e6)
foreign_2020_per_M = scb_migrant_deaths['UtrF 2020'] / (pop_2020['foreign_born'] / 1e6)
native_2020_per_M = scb_migrant_deaths['InrF 2020'] / (pop_2020['native_born'] / 1e6)


In [None]:
deaths_per_m = pd.concat([tot_15_19_per_M,foreign_15_19_per_M,native_15_19_per_M,
                         tot_2020_per_M,foreign_2020_per_M,native_2020_per_M],axis=1)

deaths_per_m.index = range(1,len(deaths_per_m) +1 )
deaths_per_m

In [None]:
last_data = (deaths_per_m.loc[deaths_per_m['Tot 2020'] != 0]).index[-1]
last_good_data = last_data - 1

deaths_per_m = (deaths_per_m.loc[:last_good_data]).copy()

In [None]:
title = 'SWEDEN deaths per million foreign vs native born, 2015-2019 vs 2020'
deaths_per_m.plot(y=['UtrF 15-19','InrF 15-19','UtrF 2020','InrF 2020'],title=title,
                 figsize=(18,12),style='o--')

plt.xlabel('week')
plt.ylabel('deaths per million')
plt.savefig('swe_deaths_per_m_native_foreign.jpg',format='jpg')

In [None]:
deaths_per_m['exs_UtrF'] = deaths_per_m['UtrF 2020'] / deaths_per_m['UtrF 15-19']
deaths_per_m['exs_InrF'] = deaths_per_m['InrF 2020'] / deaths_per_m['InrF 15-19']
deaths_per_m

In [None]:
title = 'SWEDEN change factor 2020 vs 2015-2019, deaths per million, foreign vs native born'
ax = deaths_per_m.plot(y=['exs_UtrF','exs_InrF'],figsize=(18,12),style='o--',title=title)
ax.axhline(1,color='k')
plt.ylabel('change factor')
plt.xlabel('week')
plt.savefig('swe_deaths_per_m_change_factor_foreign_native.jpg',format='jpg')

In [None]:
deaths_per_m['UtrF 15-19 cum'] = deaths_per_m['UtrF 15-19'].cumsum()
deaths_per_m['UtrF 2020 cum'] = deaths_per_m['UtrF 2020'].cumsum()
deaths_per_m['InrF 15-19 cum'] = deaths_per_m['InrF 15-19'].cumsum()
deaths_per_m['InrF 2020 cum'] = deaths_per_m['InrF 2020'].cumsum()
deaths_per_m

In [None]:
tot_cum = deaths_per_m[['Tot 15-19','Tot 2020']].cumsum()
tot_cum

In [None]:
cums = tot_cum.copy()
tmp_cums = deaths_per_m.loc[:,'UtrF 15-19 cum' :]

cums = pd.concat([tmp_cums,cums],axis=1)

cums['UtrF_f'] = cums['UtrF 2020 cum'] / cums['UtrF 15-19 cum']
cums['InrF_f'] = cums['InrF 2020 cum'] / cums['InrF 15-19 cum']
cums['Tot_f'] = cums['Tot 2020'] / cums['Tot 15-19']

cums

In [None]:
title = 'cumulative deaths per million, foreign vs native born'

ax = deaths_per_m.plot(y=['UtrF 15-19 cum','UtrF 2020 cum','InrF 15-19 cum','InrF 2020 cum'],
                 figsize=(18,12),style='o--')
tot_cum.plot(ax=ax)
plt.ylabel('deaths per million')
plt.xlabel('week')
plt.savefig('swe_deaths_per_m_foreign_native_cumulative.jpg',format='jpg')

In [None]:
tot_cum.plot(style='o--',figsize=(18,12))

In [None]:
utrf_cum_factor = deaths_per_m['UtrF 2020 cum'] / deaths_per_m['UtrF 15-19 cum']
inrf_cum_factor = deaths_per_m['InrF 2020 cum'] / deaths_per_m['InrF 15-19 cum']
tot_cum_factor = tot_cum['Tot 2020'] / tot_cum['Tot 15-19']
cum_factor = pd.concat([utrf_cum_factor,inrf_cum_factor,tot_cum_factor],axis=1)
cum_factor.columns = ['UtrF','InrF','Total']
cum_factor

In [None]:
title = 'SWEDEN weekly growth factors cumulative deaths per million 2020 vs 2015-2019, native & foregn born'
ax = cum_factor.plot(figsize=(18,12),style='o--',title=title)
plt.ylabel('growth factor')
ax.axhline(1,color='k')

In [None]:
all_cums = (deaths_per_m[['UtrF 15-19 cum','InrF 15-19 cum','UtrF 2020 cum','InrF 2020 cum']]).copy()
all_cums['Tot 15-19 cum'] = deaths_per_m['Tot 15-19'].cumsum()
all_cums['Tot 2020 cum'] = deaths_per_m['Tot 2020'].cumsum()
all_cums['f_UtrF'] = all_cums['UtrF 2020 cum'] / all_cums['UtrF 15-19 cum']
all_cums['f_InfF'] = all_cums['InrF 2020 cum'] / all_cums['InrF 15-19 cum']
all_cums['f_Tot'] = all_cums['Tot 2020 cum'] / all_cums['Tot 15-19 cum']
all_cums

In [None]:
scb_migrant_deaths = scb_migrant_deaths.loc[:last_good_data]

In [None]:
abs_sums = scb_migrant_deaths.loc[:last_good_data,'Tot 15-19':].sum()
abs_sums

In [None]:
print (abs_sums['Tot 2020'] / abs_sums['Tot 15-19'])
print (abs_sums['InrF 2020'] / abs_sums['InrF 15-19'])
print (abs_sums['UtrF 2020'] / abs_sums['UtrF 15-19'])

In [None]:
print (pop_2015_2019)
print (pop_2020)

In [None]:
pop_2020 / pop_2015_2019

In [None]:
abs_sums

In [None]:
tot_d_per_m_15_19 = abs_sums['Tot 15-19'] / (pop_2015_2019['total_pop'] / 1e6)
tot_d_per_m_2020 = abs_sums['Tot 2020'] / (pop_2020['total_pop'] / 1e6)
utrf_d_per_m_15_19 = abs_sums['UtrF 15-19'] /(pop_2015_2019['foreign_born'] / 1e6)
utrf_d_per_m_2020 = abs_sums['UtrF 2020'] / (pop_2020['foreign_born'] / 1e6)
inrf_d_per_m_15_19 = abs_sums['InrF 15-19'] / (pop_2015_2019['native_born'] / 1e6)
inrf_d_per_m_2020 = abs_sums['InrF 2020'] / (pop_2020['native_born'] / 1e6)

death_per_m_all = pd.DataFrame([tot_d_per_m_15_19,tot_d_per_m_2020,
           utrf_d_per_m_15_19,utrf_d_per_m_2020,
          inrf_d_per_m_15_19,inrf_d_per_m_2020],index=['tot 15-19','tot 2020',
                                                      'utrf 15-19','utrf 2020',
                                                      'inrf 15-19','inrf 2020'],columns=['dead_per_m'])

death_per_m_all = death_per_m_all.T
death_per_m_all

In [None]:
growth = pd.DataFrame()
growth['tot'] = death_per_m_all['tot 2020'] / death_per_m_all['tot 15-19']
growth['utrf'] = death_per_m_all['utrf 2020'] / death_per_m_all['utrf 15-19']
growth['inrf'] = death_per_m_all['inrf 2020'] / death_per_m_all['inrf 15-19']
growth

In [None]:
deaths_per_m.sum()

In [None]:
foo = scb_migrant_deaths.loc[:last_good_data,'Tot 15-19':]
foo

In [None]:
(foo['Tot 15-19'] / (pop_2015_2019['total_pop'] / 1e6)).sum()

In [None]:
foo['Tot 15-19'].sum()

In [None]:
foo['Tot 15-19'].sum() / (pop_2015_2019['total_pop'] / 1e6)

In [None]:
(foo['Tot 15-19'] / (pop_2015_2019['total_pop'] / 1e6)).cumsum()

In [None]:
tot_15_19 = foo['Tot 15-19'].sum() / (pop_2015_2019['total_pop'] / 1e6)
tot_2020 = foo['Tot 2020'].sum() / (pop_2020['total_pop'] / 1e6)

inrf_15_19 = foo['InrF 15-19'].sum() /(pop_2015_2019['native_born'] / 1e6)
inrf_2020 = foo['InrF 2020'].sum() /(pop_2020['native_born'] / 1e6)

utrf_15_19 = foo['UtrF 15-19'].sum() / (pop_2015_2019['foreign_born'] / 1e6)
utrf_2020 = foo['UtrF 2020'].sum() / (pop_2020['foreign_born'] / 1e6)

print (tot_15_19)
print (tot_2020)
print (tot_2020 / tot_15_19)
print()
print (inrf_15_19)
print (inrf_2020)
print (inrf_2020 / inrf_15_19)
print()
print (utrf_15_19)
print (utrf_2020)
print (utrf_2020 / utrf_15_19)

In [None]:
df = pd.DataFrame({'A_pop' : [20,60],
                  'B_pop' : [80,140],
                  'A_dead' : [2,10],
                  'B_dead' : [20,40]},index=['year1','year2'])

df

In [None]:
df['A_mort'] = df['A_dead'] / df['A_pop']
df['B_mort'] = df['B_dead'] / df['B_pop']
df['tot_mort'] = (df['A_dead'] + df['B_dead']) / (df['A_pop'] + df['B_pop'])

df

In [None]:
a_mort_factor = (df['A_mort'] / df['A_mort'].shift())['year2']
b_mort_factor = (df['B_mort'] / df['B_mort'].shift())['year2']
tot_mort_factor = (df['tot_mort'] / df['tot_mort'].shift())['year2']

a_pop_factor = (df['A_pop'] / df['A_pop'].shift())['year2']
b_pop_factor = (df['B_pop'] / df['B_pop'].shift())['year2']
tot_pop_factor = ((df['A_pop'] + df['B_pop']) / ((df['A_pop'] + df['B_pop']).shift()))['year2']


factors = pd.Series({'a_mort_factor': a_mort_factor,
                           'b_mort_factor' : b_mort_factor,
                           'tot_mort_factor' : tot_mort_factor,
                    'a_pop_factor' : a_pop_factor,
                    'b_pop_factor' : b_pop_factor,
                    'tot_pop_factor' : tot_pop_factor})

factors

In [None]:
pop_2020['total_pop']