In [38]:
#Python 3.11.2
#Import packages
import pandas as pd
import numpy as np
import pylab as pl
import random
import matplotlib.pyplot as plt
from collections import defaultdict
import json # for pretty printing
import geopandas as gpd

We have three different data sources. 

1. The data collected by Bodil corresponds to the plague period.
2. The information from the TABVERK database includes the population size for parishes in the posterior years of the plague.
3. The geographical information (polygons) for some parishes. This information doesn't correspond to the plague period.

Our goal is to create a unique database for our project: Plague spread across Scania, Sweden, from 1710 to 1715.

First we read the different data sources (.xlsx, .csv, and .shp files)

In [39]:
#Bodil's data Appendix 6 plague parishes
plagueParishesScania = pd.read_csv('/Users/polislizarralde/PythonMathematicalModeling/docs/PlagueProject/data/Appendix6Bodil.csv', sep=',', encoding='utf-8')
#All parishes in Scania during the plague period
allParishesScania_path = '/Users/polislizarralde/PythonMathematicalModeling/docs/PlagueProject/data/allParishesScania.xlsx'
allParishesScania = pd.read_excel(allParishesScania_path)

Transforming the lowercase to uppercase and checking the type

In [40]:
allParishesScania = allParishesScania.apply(lambda x: x.astype(str).str.upper())
plagueParishesScania = plagueParishesScania.apply(lambda x: x.astype(str).str.upper())
type(plagueParishesScania)
type(allParishesScania)

pandas.core.frame.DataFrame

Visualizing the DataFrames

In [41]:
allParishesScania.head()

Unnamed: 0,SkåneRegion,District(Härad),ParishName
0,SOUTHEAST,ALBO,ANDRARUM
1,SOUTHEAST,ALBO,BRÖSARP
2,SOUTHEAST,ALBO,ELJARÖD
3,SOUTHEAST,ALBO,FÅGELTOFTA
4,SOUTHEAST,ALBO,RAVLUNDA


In [42]:
plagueParishesScania.head()

Unnamed: 0,BeginPlaguePeriod,EndPlaguePeriod,ParishName,VictimsNumber,SkåneRegion
0,NOV 1710,APR 1711,NÄSUM,671,NORTHEAST
1,FEB 1712,UNDEFINED,NÄSUM,?,NORTHEAST
2,NOV 1710,AUG 1711,IVÖ,123,NORTHEAST
3,DEC 1710,SEP 1711,KIABY,296,NORTHEAST
4,DEC 1710,SEP 1711,OPPMANNA,400,NORTHEAST


Merging the two datasets (allParishesScania and plagueParishesScania)

In [50]:
parishesScania = pd.merge(allParishesScania, plagueParishesScania, how='left', on=['ParishName','SkåneRegion'])
parishesScania.head()

Unnamed: 0,SkåneRegion,District(Härad),ParishName,BeginPlaguePeriod,EndPlaguePeriod,VictimsNumber
0,SOUTHEAST,ALBO,ANDRARUM,,,
1,SOUTHEAST,ALBO,BRÖSARP,,,
2,SOUTHEAST,ALBO,ELJARÖD,JAN 1713,UNDEFINED,3.0
3,SOUTHEAST,ALBO,FÅGELTOFTA,,,
4,SOUTHEAST,ALBO,RAVLUNDA,,,


Checking that the new data frame keep all the outbreaks for parish

In [57]:
parishesScania.loc[parishesScania['ParishName'] == 'NÄSUM']

Unnamed: 0,SkåneRegion,District(Härad),ParishName,BeginPlaguePeriod,EndPlaguePeriod,VictimsNumber
393,NORTHEAST,VILLANDS,NÄSUM,NOV 1710,APR 1711,671
394,NORTHEAST,VILLANDS,NÄSUM,FEB 1712,UNDEFINED,?


Defining a function for extracting the names of the parishes in the data frame

In [51]:
def get_Names(data: pd.DataFrame, heading:str) -> list:
    return data[heading].tolist()   

Filtering the data frame by region and then get the names of the parishes:

In [79]:
parishesScania_names = get_Names(parishesScania, 'ParishName')
len(parishesScania_names)

408

In [291]:
southeastParishes = allParishesScania.loc[allParishesScania['Region'] == 'SOUTHEAST']
southeastParishes_names = get_Names(southeastParishes, 'Parish Name')

In [292]:
southeastParishes_names[1]

'BRÖSARP'

Reading the census file:

In [58]:
census_path = '/Users/polislizarralde/Desktop/CensusScania/FILE01_FALD.csv'
censusSweden = pd.read_csv(census_path, sep=';')
censusSweden.shape

(102360, 50)

Checking the memory usage

In [247]:
censusSweden.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102360 entries, 0 to 102359
Data columns (total 50 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   LANGEN       102360 non-null  int64  
 1   LANGENNMN    102360 non-null  object 
 2   GEOID        102360 non-null  int64  
 3   GEOIDNMN     102360 non-null  object 
 4   GEOIDTYP     102360 non-null  int64  
 5   AR           102360 non-null  int64  
 6   FORMNR       102360 non-null  int64  
 7   KON          102360 non-null  int64  
 8   FORMID       102360 non-null  int64  
 9   ALD00        102360 non-null  int64  
 10  ALD01        102360 non-null  int64  
 11  ALD03        102360 non-null  int64  
 12  ALD03_1      102360 non-null  int64  
 13  ALD05        102360 non-null  int64  
 14  ALD05_2      102360 non-null  int64  
 15  ALD10        102360 non-null  int64  
 16  ALD15        102360 non-null  int64  
 17  ALD15_1      102360 non-null  int64  
 18  ALD15_2      102360 non-

Checking the names of all columns in the data

In [84]:
columns = censusSweden.columns

Calling the data only with specific columns

In [64]:
censusSweden = pd.read_csv(census_path, sep=';', usecols=[
                           'LANGENNMN'  # Standard name of the county for the geographical area in plain text
                           # Standard name of the geographical area in plain text, i.e. not a source name.
                           , 'GEOIDNMN', 'GEOIDTYP'  # Type of breakdown of the geographical area  0 =Assembly, 1 = Pastorate, 2 = Other type, 3 = Several parishes, 9 = Part of a parish
                           , 'AR'  # Year
                           , 'KON'  # 1 = Man  2 = Female. I choose 1 but it could be 2 for the total population
                           , 'BEF_TOT'  # Total population at source
                           , 'BEF_GENTOT'  # Total population, generated
                           ])

Processing the census data such that corresponds only to Scania

In [65]:
censusScania = censusSweden.loc[((censusSweden['LANGENNMN'] == 'KRISTIANSTADS LÄN') | (
    censusSweden['LANGENNMN'] == 'MALMÖHUS LÄN')) & (censusSweden['KON'] == 1)]
censusScania.shape
censusScania.head(3)

Unnamed: 0,LANGENNMN,GEOIDNMN,GEOIDTYP,AR,KON,BEF_TOT,BEF_GENTOT
40804,KRISTIANSTADS LÄN,ONSLUNDA,0,1750,1,403,403
40806,KRISTIANSTADS LÄN,ONSLUNDA,0,1751,1,404,404
40808,KRISTIANSTADS LÄN,ONSLUNDA,0,1752,1,412,412


Process the data from Scania only to keep the first population size registered for each parish.
This was done following two approaches: the first one by grouping the data by parish name and then selecting the minimum year. The second approach is exhaustively exploring the given DataFrame and keeping the required information in a dictionary. The information required in our case corresponds to the position associated with each parish name and the minimum year, according to the original DataFrame. 

1. First approach:

In [68]:
#Group a Pandas DataFrame by a column
parish_grp = censusScania.groupby(['GEOIDNMN'])
#Get a group by an element of the column
example = parish_grp.get_group('SANKT MARIA (YSTAD SF)')
#Select the row with minimum value in Pandas dataframe
example[example.AR == example.AR.min()]

#Get the unique values of a column
parish_grp['GEOIDNMN'].unique()
#Get the unique values of a column as a list
parish_grp_name = parish_grp['GEOIDNMN'].unique().tolist()
#Get the unique values of a column as a list of strings
parish_names = [parish_name[0] for parish_name in parish_grp_name]


In [69]:
parish_grp = censusScania.groupby(['GEOIDNMN'])
parish_grp_name = parish_grp['GEOIDNMN'].unique().tolist()
parish_names = [parish_name[0] for parish_name in parish_grp_name]

popSizeScania = pd.DataFrame() 

for i in range(len(parish_grp)):
    grp_i = parish_grp.get_group(parish_names[i])
    grp_i[grp_i.AR == grp_i.AR.min()]
    popSizeScania = pd.concat([popSizeScania, (grp_i[grp_i.AR == grp_i.AR.min()])], axis=0)

print(popSizeScania.shape)  

(495, 7)


In [74]:
popSizeScania.head(3)

Unnamed: 0,LANGENNMN,GEOIDNMN,GEOIDTYP,AR,KON,BEF_TOT,BEF_GENTOT
40804,KRISTIANSTADS LÄN,ONSLUNDA,0,1750,1,403,403
40856,KRISTIANSTADS LÄN,HÄGLINGE,0,1805,1,713,713
40878,KRISTIANSTADS LÄN,"NORRA RÖRUM, DEL (KRISTIANSTAD)",9,1795,1,231,232


2. Second approach

In [73]:
aux_dict = {}
popSizeScania = pd.DataFrame() 

for i in range(len(censusScania)):
    name_i = censusScania['GEOIDNMN'].iloc[i]
    ar_i = censusScania['AR'].iloc[i]
    if name_i in aux_dict:
        if ar_i < aux_dict[name_i]['min']:
            aux_dict[name_i] = {'min': ar_i, 'position': i}
    else:
        aux_dict[name_i] = {'min': ar_i, 'position': i}
final_positions = [value['position'] for key, value in aux_dict.items()]
popSizeScania = censusScania.iloc[final_positions]   
print(popSizeScania.shape)            
    

(495, 7)


In [75]:
popSizeScania.head(3)

Unnamed: 0,LANGENNMN,GEOIDNMN,GEOIDTYP,AR,KON,BEF_TOT,BEF_GENTOT
40804,KRISTIANSTADS LÄN,ONSLUNDA,0,1750,1,403,403
40856,KRISTIANSTADS LÄN,HÄGLINGE,0,1805,1,713,713
40878,KRISTIANSTADS LÄN,"NORRA RÖRUM, DEL (KRISTIANSTAD)",9,1795,1,231,232


In [98]:
popSizeScania_names = get_Names(popSizeScania, 'GEOIDNMN')

Reading the geographical information by parish

In [94]:
parishScania_path = '/Users/polislizarralde/PythonMathematicalModeling/docs/PlagueProject/MapScaniaSweden/Parishes1720_1890.shp'
parishScaniaMap = gpd.read_file(parishScania_path)

In [108]:
parishScaniaMap.columns

Index(['G_NAME', 'GET_END_YE', 'GET_START_', 'G_UNIT_TYP', 'G_UNIT', 'G_SEQ',
       'geometry'],
      dtype='object')

In [111]:
parishScaniaMap = gpd.read_file(parishScania_path, usecols=[
                           'G_NAME'  
                           , 'GET_END_YE'
                           , 'GET_START_'
                           , 'geometry'  
                           ])
parishScaniaMap.head(10)

Unnamed: 0,G_NAME,GET_END_YE,GET_START_,G_UNIT_TYP,G_UNIT,G_SEQ,geometry
0,HOVS FÖRSAMLING L LÄN,9999,,SWE_KYRK,10744015.0,51568.0,"POLYGON ((4159374.280 3280178.361, 4158183.768..."
1,TOREKOVS FÖRSAMLING,9999,,SWE_KYRK,10744027.0,94148.0,"POLYGON ((4158250.036 3280475.526, 4158374.467..."
2,KRISTIANSTADS HELIGA TREFALDIGHETS FÖRSAMLING,9999,,SWE_KYRK,10744052.0,98647.0,"POLYGON ((4249757.034 3241445.338, 4249645.614..."
3,NORRA ÅSUMS FÖRSAMLING,9999,,SWE_KYRK,10744064.0,96757.0,"POLYGON ((4249757.034 3241445.338, 4249865.346..."
4,ÄSPHULTS FÖRSAMLING,9999,,SWE_KYRK,10744076.0,93166.0,"POLYGON ((4227839.808 3230701.940, 4228155.888..."
5,DJURRÖDS FÖRSAMLING,9999,,SWE_KYRK,10744088.0,96902.0,"POLYGON ((4235034.141 3239316.925, 4235277.475..."
6,TRÄNE FÖRSAMLING,9999,,SWE_KYRK,10744090.0,95622.0,"POLYGON ((4241776.825 3239062.212, 4241952.361..."
7,SKEPPARSLÖVS FÖRSAMLING,9999,,SWE_KYRK,10744106.0,45567.0,"POLYGON ((4241776.825 3239062.212, 4241471.416..."
8,VÄ FÖRSAMLING,9999,,SWE_KYRK,10744118.0,94721.0,"POLYGON ((4246916.116 3231887.997, 4247067.301..."
9,KÖPINGE FÖRSAMLING,9999,,SWE_KYRK,10744120.0,95568.0,"POLYGON ((4255464.917 3230813.998, 4255408.599..."


In [125]:
parishScaniaMap = parishScaniaMap.apply(lambda x: x.replace({'FÖRSAMLING': '', 'L LÄN' : '', 'S LÄN' : ''}, regex=True))

In [None]:
def 

In [126]:
parishScaniaMap.head(10)

Unnamed: 0,G_NAME,GET_END_YE,GET_START_,G_UNIT_TYP,G_UNIT,G_SEQ,geometry
0,HOVS,9999,,SWE_KYRK,10744015.0,51568.0,"POLYGON ((4159374.280 3280178.361, 4158183.768..."
1,TOREKOVS,9999,,SWE_KYRK,10744027.0,94148.0,"POLYGON ((4158250.036 3280475.526, 4158374.467..."
2,KRISTIANSTADS HELIGA TREFALDIGHETS,9999,,SWE_KYRK,10744052.0,98647.0,"POLYGON ((4249757.034 3241445.338, 4249645.614..."
3,NORRA ÅSUMS,9999,,SWE_KYRK,10744064.0,96757.0,"POLYGON ((4249757.034 3241445.338, 4249865.346..."
4,ÄSPHULTS,9999,,SWE_KYRK,10744076.0,93166.0,"POLYGON ((4227839.808 3230701.940, 4228155.888..."
5,DJURRÖDS,9999,,SWE_KYRK,10744088.0,96902.0,"POLYGON ((4235034.141 3239316.925, 4235277.475..."
6,TRÄNE,9999,,SWE_KYRK,10744090.0,95622.0,"POLYGON ((4241776.825 3239062.212, 4241952.361..."
7,SKEPPARSLÖVS,9999,,SWE_KYRK,10744106.0,45567.0,"POLYGON ((4241776.825 3239062.212, 4241471.416..."
8,VÄ,9999,,SWE_KYRK,10744118.0,94721.0,"POLYGON ((4246916.116 3231887.997, 4247067.301..."
9,KÖPINGE,9999,,SWE_KYRK,10744120.0,95568.0,"POLYGON ((4255464.917 3230813.998, 4255408.599..."


In [127]:
parishScaniaMap_names = get_Names(parishScaniaMap, 'G_NAME')

In [128]:
def check_name(data: pd.DataFrame, column: pd.Series, name: str):
    filt_name = column.str.contains(name, na=False)
    return data.loc[filt_name]

In [129]:
def filter_data_by_name(data: pd.DataFrame, column: pd.Series, input_names: list[str]):
    output_names = []
    for name in input_names:
        filter_data = check_name(data, column, name)
        if len(filter_data) == 0 :
            output_names  = output_names  + [name]
        else:
            continue
    return output_names


In [92]:
print(len(filter_data_by_name(popSizeScania, popSizeScania['GEOIDNMN'], parishesScania_names)))

56


In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


In [130]:
filter_data_by_name(parishScaniaMap, parishScaniaMap['G_NAME'], popSizeScania_names)

  filt_name = column.str.contains(name, na=False)


['NORRA RÖRUM, DEL (KRISTIANSTAD)',
 'RÖSTÅNGA, DEL (KRISTIANSTAD)',
 'RAVLUNDA PASTORAT',
 'SÖDRA MELLBY PASTORAT',
 'GLADSAX PASTORAT',
 'SANKT OLOV',
 'STORA KÖPINGE,DEL (KRISTIANSTAD)',
 'ÖVRABY, DEL (KRISTIANSTAD)',
 'ÖSTRA HERRESTAD PASTORAT',
 'TRYDE, DEL (KRISTIANSTAD)',
 'NORRA VRAM, DEL (KRISTIANDSTAD)',
 'VÄSTRA BROBY, DEL (KRISTIANDSTAD)',
 'STRÖVELSTORP, DEL (KRISTIANSTAD)',
 'GRÅMANSTORP',
 'ÖSSJÖ PASTORAT',
 'VÄSTRA KARUP PASTORAT',
 'FÖRSLÖV PASTORAT',
 'MUNKA-LJUNGBY',
 'BARKÅKRA PASTORAT',
 'VINSLÖV PASTORAT',
 'NORRA MELLBY PASTORAT',
 'TJÖRNARP, DEL AV (FROSTA HÄRAD, MALMÖHUS LÄN)',
 'BRÖNNESTAD PASTORAT',
 'FINJA PASTORAT',
 'RÖKE PASTORAT',
 'VANKIVA PASTORAT',
 'NORRA ÅKARP PASTORAT',
 'VISSELTOFTA PASTORAT',
 'SÖRBY PASTORAT',
 'FÄRLÖV PASTORAT',
 'KRISTIANSTAD STAD',
 'KRISTIANSTADS GARNISONSFÖRS.',
 'KRISTIANSTADS SLOTTSFÖRSAMLING',
 'ÅHUS PASTORAT',
 'OPPMANNA PASTORAT',
 'FJÄLKESTAD PASTORAT',
 'RÅBELÖV',
 'IVÖ PASTORAT',
 'SVENSKÖP, DEL (KRISTIANSTAD)',
 'Ä

In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


In [None]:
southeastParishesNoPop = []

for name in southeastParishes_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        southeastParishesNoPop = southeastParishesNoPop + [name]
    else:
        continue
    print(southeastParishesNoPop)
    

['SANKT OLOF']
['SANKT OLOF', 'YSTAD SANKT PETRI']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD']
['SANKT OLOF', 'YSTAD SANKT PETRI', 'YSTAD SANKT MARIA', 'JERRESTAD', 'SKÖRUP']


Check regular expressions code in Mathematica for eliminATING SOME STRINGS

In [83]:

parishesScaniaNoPop = []

for name in parishesScania_names:
    filter_data = check_name(popSizeScania, popSizeScania['GEOIDNMN'], name)
    if len(filter_data) == 0 :
        parishesScaniaNoPop  = parishesScaniaNoPop  + [name]
    else:
        continue

    

In [287]:
check_name(popSizeScania, popSizeScania['GEOIDNMN'], 'YSTAD')
check_name(popSizeScania, popSizeScania['GEOIDNMN'], 'OLOFS')


Unnamed: 0,LANGEN,LANGENNMN,GEOID,GEOIDNMN,GEOIDTYP,AR,FORMNR,KON,FORMID,BEF_SUM,BEF_TOT,BEF_GENSUM,BEF_GENTOT


In [213]:
type(censusScania['GEOIDNMN'])

pandas.core.series.Series

In [220]:
filt2 = popSizeScania['GEOIDNMN'].str.contains('NORRA RÖRUM', na=False)
popSizeScania.loc[filt2]

Unnamed: 0,LANGEN,LANGENNMN,GEOID,GEOIDNMN,GEOIDTYP,AR,FORMNR,KON,FORMID,BEF_SUM,BEF_TOT,BEF_GENSUM,BEF_GENTOT
40902,11,KRISTIANSTADS LÄN,71342,"NORRA RÖRUM, DEL (KRISTIANSTAD)",9,1855,610,1,100573,169,346,169,346
53446,12,MALMÖHUS LÄN,71340,NORRA RÖRUM,0,1785,200,1,131832,376,783,376,783
53440,12,MALMÖHUS LÄN,71349,NORRA RÖRUM PASTORAT,1,1773,100,1,131817,587,1228,587,1228
53472,12,MALMÖHUS LÄN,71341,"NORRA RÖRUM, DEL (MALMÖHUS)",9,1855,610,1,131914,475,978,475,978


In [218]:
print(popSizeScania[['GEOIDNMN','AR']].iloc[2])

GEOIDNMN    NORRA RÖRUM, DEL (KRISTIANSTAD)
AR                                     1855
Name: 40902, dtype: object
