In [1]:
import pandas as pd

### Read in population data, rename columns and drop an unused one

In [2]:
df_pop = pd.read_csv("../data/munich_pop.csv").rename(columns={"plz": "zipcode",
                                                              "einwohner": "population",
                                                              "qkm": "sqkm",
                                                              "Density": "density"}
                                                     )
df_pop.drop("note", axis=1, inplace=True)
df_pop.astype({'zipcode':'str'})
df_pop.head()

Unnamed: 0,zipcode,population,sqkm,lat,lon,density
0,80331,4741,0.78277,48.13575,11.57351,6056.696092
1,80333,11265,1.615148,48.14495,11.56824,6974.59304
2,80335,9042,1.697719,48.14657,11.55112,5325.969728
3,80336,8103,1.568529,48.13101,11.55228,5165.98673
4,80337,15280,1.046991,48.12675,11.55904,14594.203771


In [3]:
df_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   zipcode     75 non-null     int64  
 1   population  75 non-null     int64  
 2   sqkm        75 non-null     float64
 3   lat         75 non-null     float64
 4   lon         75 non-null     float64
 5   density     75 non-null     float64
dtypes: float64(4), int64(2)
memory usage: 3.6 KB


In [11]:
df_pop.zipcode.nunique()

75

### Read in physitionas data and add a zipcode column

In [4]:
df_phys = pd.read_csv("../data/physicians_with_coordinates.csv", index_col=0)
df_phys.head()

Unnamed: 0,name,expertise,distance,street_and_house_no,zipcode_and_city,telephone,lat,lon
0,Frau Dr. med. Christa Hutterer,"Fachärztin für Diagnostische Radiologie, Fachä...",0.1 km,Kaufingerstraße 15,80331 München-Altstadt-Lehel,089 / 2 00 01 43 - 50,48.137815,11.571804
1,Herr Dr. med. Josef J. Dohrenbusch,Facharzt für Allgemeinmedizin,0.1 km,Kaufingerstraße 12,80331 München,089 / 24 20 93 98,48.137615,11.573961
2,Frau Dr. med. Heidi Herrmann,Fachärztin für Innere Medizin,0.1 km,Altheimer Eck 2,80331 München,089 / 45 22 81 81,48.137541,11.571022
3,Herr Dr. med. (univ.) Thomas Wendel,Praktischer Arzt,0.1 km,Altheimer Eck 10,80331 München,089 / 89 67 40 20,48.137505,11.570328
4,Frau Dr. med. Sabine Konz,Praktische Ärztin,0.2 km,Eisenmannstraße 4,80331 München,089 / 37 02 97 67,48.137763,11.569028


In [5]:
df_phys["zipcode"] = df_phys["zipcode_and_city"].apply(lambda x: x.split(' ')[0])
df_phys.head()

Unnamed: 0,name,expertise,distance,street_and_house_no,zipcode_and_city,telephone,lat,lon,zipcode
0,Frau Dr. med. Christa Hutterer,"Fachärztin für Diagnostische Radiologie, Fachä...",0.1 km,Kaufingerstraße 15,80331 München-Altstadt-Lehel,089 / 2 00 01 43 - 50,48.137815,11.571804,80331
1,Herr Dr. med. Josef J. Dohrenbusch,Facharzt für Allgemeinmedizin,0.1 km,Kaufingerstraße 12,80331 München,089 / 24 20 93 98,48.137615,11.573961,80331
2,Frau Dr. med. Heidi Herrmann,Fachärztin für Innere Medizin,0.1 km,Altheimer Eck 2,80331 München,089 / 45 22 81 81,48.137541,11.571022,80331
3,Herr Dr. med. (univ.) Thomas Wendel,Praktischer Arzt,0.1 km,Altheimer Eck 10,80331 München,089 / 89 67 40 20,48.137505,11.570328,80331
4,Frau Dr. med. Sabine Konz,Praktische Ärztin,0.2 km,Eisenmannstraße 4,80331 München,089 / 37 02 97 67,48.137763,11.569028,80331


In [6]:
df_phys_by_zipcode = df_phys.groupby("zipcode").agg(physicians_count=('name','count')).reset_index()
df_phys_by_zipcode.astype({'zipcode':'str'})
df_phys_by_zipcode.head()

Unnamed: 0,zipcode,physicians_count
0,80331,174
1,80333,102
2,80335,71
3,80336,108
4,80337,50


In [7]:
df_phys_by_zipcode.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   zipcode           55 non-null     object
 1   physicians_count  55 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1008.0+ bytes


In [8]:
df_phys_by_zipcode = df_phys_by_zipcode.astype({'zipcode': 'int64'})
df_phys_by_zipcode.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   zipcode           55 non-null     int64
 1   physicians_count  55 non-null     int64
dtypes: int64(2)
memory usage: 1008.0 bytes


In [12]:
df_phys_by_zipcode.zipcode.nunique()

55

In [9]:
df_merged = df_pop.merge(df_phys_by_zipcode)
df_merged.head()

Unnamed: 0,zipcode,population,sqkm,lat,lon,density,physicians_count
0,80331,4741,0.78277,48.13575,11.57351,6056.696092,174
1,80333,11265,1.615148,48.14495,11.56824,6974.59304,102
2,80335,9042,1.697719,48.14657,11.55112,5325.969728,71
3,80336,8103,1.568529,48.13101,11.55228,5165.98673,108
4,80337,15280,1.046991,48.12675,11.55904,14594.203771,50


In [10]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   zipcode           53 non-null     int64  
 1   population        53 non-null     int64  
 2   sqkm              53 non-null     float64
 3   lat               53 non-null     float64
 4   lon               53 non-null     float64
 5   density           53 non-null     float64
 6   physicians_count  53 non-null     int64  
dtypes: float64(4), int64(3)
memory usage: 3.0 KB
