# Data Creation 


### 1. Reading in the Data




In [22]:
import pandas as pd           #Initialising the needed libaries

In [23]:
#Data with the Universities
# data = pd.read_csv(r"\Users\jaho3\OneDrive\Documents\group-3\archive\cwurData.csv")#, usecols = ['world_rank',"institution","country", "national_rank", "quality_of_education", "quality_of_faculty", "year"])   
data = pd.read_csv(r"data/cwurData.csv")

In [24]:
data["has_u"] = data["institution"].str.contains("u")

In [25]:
def get_balcony_percentage_by_city(city_entries: pd.Series) -> float:
    vc = city_entries.value_counts(normalize = True)
    if True in vc:
        return vc[True]
    else:
        return 1 - vc[False]

placeholder = data.groupby("country")["has_u"].apply(get_balcony_percentage_by_city)

In [26]:
data.groupby("country")["has_u"].apply(get_balcony_percentage_by_city).reset_index()

Unnamed: 0,country,has_u
0,Argentina,0.285714
1,Australia,0.448276
2,Austria,0.333333
3,Belgium,0.5
4,Brazil,0.333333
5,Bulgaria,0.0
6,Canada,0.277778
7,Chile,0.0
8,China,0.389222
9,Colombia,0.0


In [27]:
#Data with Appartment Info
#appartments = pd.read_csv(r"\Users\jaho3\Downloads\archive (3)\immo_data.csv", usecols = ['regio1','serviceCharge','balcony','telekomUploadSpeed','totalRent', "yearConstructed", "hasKitchen","cellar", "baseRent","livingSpace","petsAllowed", "lift","noRooms", "garden", "regio2", "regio3" ])
appartments = pd.read_csv(r"data/immo_data.csv", usecols = ['regio1','serviceCharge','balcony','telekomUploadSpeed','totalRent', "yearConstructed", "hasKitchen","cellar", "baseRent","livingSpace","petsAllowed", "lift","noRooms", "garden", "regio2", "regio3" ])

In [28]:
#Data with Population Info
population_data=pd.read_csv("data/Deutschland_Cities.csv")
#population_data=pd.read_csv(r"\Users\jaho3\Downloads\TechLabs\TechLabs\Deutschland_Cities.csv")

In [29]:
data.head()

Unnamed: 0,world_rank,institution,country,national_rank,quality_of_education,alumni_employment,quality_of_faculty,publications,influence,citations,broad_impact,patents,score,year,has_u
0,1,Harvard University,USA,1,7,9,1,1,1,1,,5,100.0,2012,False
1,2,Massachusetts Institute of Technology,USA,2,9,17,3,12,4,4,,1,91.67,2012,True
2,3,Stanford University,USA,3,17,11,5,4,2,2,,15,89.5,2012,False
3,4,University of Cambridge,United Kingdom,1,10,24,4,16,16,11,,50,86.17,2012,False
4,5,California Institute of Technology,USA,4,2,29,7,37,22,22,,18,85.21,2012,True


In [30]:
appartments.head()

Unnamed: 0,regio1,serviceCharge,balcony,telekomUploadSpeed,totalRent,yearConstructed,hasKitchen,cellar,baseRent,livingSpace,petsAllowed,lift,noRooms,garden,regio2,regio3
0,Nordrhein_Westfalen,245.0,False,10.0,840.0,1965.0,False,True,595.0,86.0,,False,4.0,True,Dortmund,Schüren
1,Rheinland_Pfalz,134.0,True,10.0,,1871.0,False,False,800.0,89.0,no,False,3.0,False,Rhein_Pfalz_Kreis,Böhl_Iggelheim
2,Sachsen,255.0,True,2.4,1300.0,2019.0,False,True,965.0,83.8,,True,3.0,False,Dresden,Äußere_Neustadt_Antonstadt
3,Sachsen,58.15,True,40.0,,1964.0,False,False,343.0,58.15,,False,3.0,False,Mittelsachsen_Kreis,Freiberg
4,Bremen,138.0,True,,903.0,1950.0,False,False,765.0,84.97,,False,3.0,False,Bremen,Neu_Schwachhausen


In [31]:
population_data.head()


Unnamed: 0,city,lat,lng,country,iso2,admin_name,capital,population,population_proper
0,Berlin,52.5167,13.3833,Germany,DE,Berlin,primary,3644826.0,3644826.0
1,Hamburg,53.55,10.0,Germany,DE,Hamburg,admin,1841179.0,1841179.0
2,Munich,48.1372,11.5755,Germany,DE,Bavaria,admin,1471508.0,1471508.0
3,Cologne,50.9422,6.9578,Germany,DE,North Rhine-Westphalia,,1085664.0,1085664.0
4,Frankfurt,50.1136,8.6797,Germany,DE,Hesse,minor,753056.0,753056.0


### 2. Preparing the Data

##### 2.1. Universities

In [32]:
#Shrinking the Dataset to just Universities from Germany
data = data[data["country"] == "Germany"]  

In [33]:
#Creating a "city"-column via the city names inside the institution names
regex = "(University of [a-zA-ZüäöÖÄÜ]+)"
data["city"] = data["institution"].str.extract(regex)
data["city"] = data["city"].str.replace("University of ", "")
data.loc[data["city"].str.contains("technology", case = False, na = False), "city"] = pd.NA

In [34]:
#Problem: There are still NaN Values, but only 30 
data["city"].value_counts(dropna = False)

NaN           30
Munich         7
Berlin         6
Heidelberg     4
Mannheim       2
Siegen         2
Osnabrück      2
Greifswald     2
Hohenheim      2
Lübeck         2
Jena           2
Bremen         2
Bayreuth       2
Potsdam        2
Halle          2
Hanover        2
Rostock        2
Konstanz       2
Dortmund       2
Augsburg       2
Regensburg     2
Marburg        2
Münster        2
Bonn           2
Freiburg       2
Tübingen       2
Göttingen      2
Hamburg        2
Cologne        2
Erlangen       2
Duisburg       2
Würzburg       2
Kiel           2
Mainz          2
Düsseldorf     2
Ulm            2
Stuttgart      2
Oldenburg      2
Name: city, dtype: int64

In [35]:
#So we added the missing Values just manually (Manually because of names like "Dresden University of Technology" or "Justus Liebig University Giessen" which are to different to use the other method )
data.to_excel("data/city.xlsx")

After manually adding the values, we have renamed the city file into city_complete so that our manuall changes will not be overwritten.

In [36]:
#Now we just read in the data again with the name uni_cities
uni_cities=pd.read_excel(r"data/city_complete.xlsx")

In [37]:
uni_cities["city"].value_counts(dropna = False)

Munich            7
Berlin            6
Heidelberg        4
Bremen            3
Kiel              3
Mannheim          2
Halle             2
Jena              2
Giessen           2
Saarbrücken       2
Dortmund          2
Konstanz          2
Rostock           2
Bielefeld         2
Hanover           2
Magdeburg         2
Potsdam           2
Osnabrück         2
Marburg           2
Braunschweig      2
Kaiserslautern    2
Augsburg          2
Lübeck            2
Hohenheim         2
Greifswald        2
Siegen            2
Bayreuth          2
Regensburg        2
Darmstadt         2
Bochum            2
Bonn              2
Freiburg          2
Tübingen          2
Frankfurt         2
Göttingen         2
Hamburg           2
Cologne           2
Münster           2
Erlangen          2
Karlsruhe         2
Würzburg          2
Dresden           2
Mainz             2
Düsseldorf        2
Ulm               2
Hannover          2
Leipzig           2
Stuttgart         2
Duisburg          2
Oldenburg         2


In [38]:
uni_cities.head()

Unnamed: 0.1,Unnamed: 0,world_rank,institution,country,national_rank,quality_of_education,quality_of_faculty,year,city
0,81,82,Ruprecht Karl University of Heidelberg,Germany,1,87,52,2012,Heidelberg
1,82,83,Ludwig Maximilian University of Munich,Germany,2,90,90,2012,Munich
2,89,90,Technical University of Munich,Germany,3,52,101,2012,Munich
3,166,67,Ludwig Maximilian University of Munich,Germany,1,62,92,2013,Munich
4,178,79,Ruprecht Karl University of Heidelberg,Germany,2,73,55,2013,Heidelberg


In [39]:
#Just rename the old index column if we maybe need it later to compare something
uni_cities.rename(columns={"Unnamed: 0":"old_index"}, inplace=True)

In [40]:
#Replace "Munich" with "München" so that all cities are there with their german name 
uni_cities["city"]=uni_cities["city"].str.replace("Munich", "München")

In [41]:
#Replace "Cologne" with "Köln" so that all cities are there with their german name 
uni_cities["city"]=uni_cities["city"].str.replace("Cologne", "Köln")

In [42]:
#Replace "Hanover" with "Hannover" so that all cities are there with their german name 
uni_cities["city"]=uni_cities["city"].str.replace("Hanover", "Hannover")


In [43]:
#Replace "Frankfurt" with "Frankfurt am Main" so that the cities in both sets are matching
uni_cities["city"]=uni_cities["city"].str.replace("Frankfurt", "Frankfurt am Main")

In [44]:
#Replace "Marburg" with "Marburg Biedenkopf" so that the cities in both sets are matching
uni_cities["city"]=uni_cities["city"].str.replace("Marburg", "Marburg Biedenkopf")

In [45]:
#Replace "Siegen" with "Siegen Wittgenstein" so that the cities in both sets are matching
uni_cities["city"]=uni_cities["city"].str.replace("Siegen", "Siegen Wittgenstein")

In [46]:
#Replace "Hohenheim" with "Stuttgart" because the University of Hohenheim is in Stuttgart
uni_cities["city"]=uni_cities["city"].str.replace("Hohenheim", "Stuttgart")


In [47]:
uni_cities

Unnamed: 0,old_index,world_rank,institution,country,national_rank,quality_of_education,quality_of_faculty,year,city
0,81,82,Ruprecht Karl University of Heidelberg,Germany,1,87,52,2012,Heidelberg
1,82,83,Ludwig Maximilian University of Munich,Germany,2,90,90,2012,München
2,89,90,Technical University of Munich,Germany,3,52,101,2012,München
3,166,67,Ludwig Maximilian University of Munich,Germany,1,62,92,2013,München
4,178,79,Ruprecht Karl University of Heidelberg,Germany,2,73,55,2013,Heidelberg
...,...,...,...,...,...,...,...,...,...
110,1992,793,University of Hohenheim,Germany,51,367,218,2015,Stuttgart
111,2000,801,University of Oldenburg,Germany,52,367,218,2015,Oldenburg
112,2056,857,University of Siegen,Germany,53,367,218,2015,Siegen Wittgenstein
113,2075,876,University of Osnabrück,Germany,54,367,218,2015,Osnabrück


##### 2.2. Appartment Data

In [48]:
appartments

Unnamed: 0,regio1,serviceCharge,balcony,telekomUploadSpeed,totalRent,yearConstructed,hasKitchen,cellar,baseRent,livingSpace,petsAllowed,lift,noRooms,garden,regio2,regio3
0,Nordrhein_Westfalen,245.00,False,10.0,840.0,1965.0,False,True,595.0,86.00,,False,4.0,True,Dortmund,Schüren
1,Rheinland_Pfalz,134.00,True,10.0,,1871.0,False,False,800.0,89.00,no,False,3.0,False,Rhein_Pfalz_Kreis,Böhl_Iggelheim
2,Sachsen,255.00,True,2.4,1300.0,2019.0,False,True,965.0,83.80,,True,3.0,False,Dresden,Äußere_Neustadt_Antonstadt
3,Sachsen,58.15,True,40.0,,1964.0,False,False,343.0,58.15,,False,3.0,False,Mittelsachsen_Kreis,Freiberg
4,Bremen,138.00,True,,903.0,1950.0,False,False,765.0,84.97,,False,3.0,False,Bremen,Neu_Schwachhausen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
268845,Bayern,90.00,True,10.0,910.0,2016.0,False,True,820.0,90.00,no,False,3.0,False,Weilheim_Schongau_Kreis,Eberfing
268846,Hessen,220.00,True,,1150.0,1983.0,True,False,930.0,115.00,negotiable,False,3.5,False,Bergstraße_Kreis,Viernheim
268847,Hessen,220.00,True,40.0,930.0,1965.0,False,True,650.0,95.00,negotiable,False,4.0,True,Limburg_Weilburg_Kreis,Limburg_an_der_Lahn
268848,Nordrhein_Westfalen,175.00,True,,1015.0,2019.0,False,True,840.0,70.00,no,True,2.0,False,Köln,Dellbrück


To combine the Appartment Data with the University Data we need to adjust the City column (regio2)

In [49]:
#First problem is that in regio2 words are seperated via "_" and not with an space
appartments["regio2"]=appartments["regio2"].str.replace("_", " ")


In [50]:
#Next problem is that in regio2 words are seperated via "_" and not with an space
appartments["regio2"]=appartments["regio2"].str.replace("ß", "ss")

In [51]:
#Next problem is that in regio2 "Halle" is called "Halle Saale"
appartments["regio2"]=appartments["regio2"].str.replace("Halle Saale", "Halle")


In [52]:
#Next problem is that in regio2 "Freiburg" is called "Freiburg im Breisgau"
appartments["regio2"]=appartments["regio2"].str.replace("Freiburg im Breisgau", "Freiburg")

In [53]:
#Next problem is that in regio2 some Citys are split into regions so we delete "Kreis" 
appartments["regio2"]=appartments["regio2"].str.replace(" Kreis", "")

In [54]:
#Next problem is that in regio2 the city Saarbrücken is called "Stadtverband Saarbrücken" 
appartments["regio2"]=appartments["regio2"].str.replace("Stadtverband ", "")

In [55]:
#The changes have worked
appartments["regio2"].unique()

array(['Dortmund', 'Rhein Pfalz', 'Dresden', 'Mittelsachsen', 'Bremen',
       'Schleswig Flensburg', 'Emmendingen', 'Gelsenkirchen', 'Chemnitz',
       'Südliche Weinstrasse', 'Hamm', 'Weimar', 'Main Kinzig',
       'Duisburg', 'Göttingen', 'Neumünster', 'Stuttgart', 'Leipzig',
       'München', 'Hamburg', 'Braunschweig', 'Esslingen', 'Magdeburg',
       'Schwerin', 'Passau', 'Mettmann', 'Vogtlandkreis', 'Gross Gerau',
       'Sächsische Schweiz Osterzgebirge', 'Görlitz',
       'Rheinisch Bergischer', 'Essen', 'Meissen', 'Mannheim',
       'Wesermarsch', 'Hochsauerlandkreis', 'Unna', 'Bautzen', 'Berlin',
       'Frankfurt am Main', 'Halle', 'Steinburg', 'Aschaffenburg',
       'Oder Spree', 'Bremerhaven', 'Zwickau', 'Nordsachsen',
       'Mansfeld Südharz', 'Alzey Worms', 'Giessen', 'Main Taunus',
       'Wuppertal', 'Viersen', 'Düsseldorf', 'Gera', 'Böblingen',
       'Würzburg', 'Kitzingen', 'Stendal', 'Nordvorpommern', 'Rhein Erft',
       'Mülheim an der Ruhr', 'Heilbronn', 'Hers

In [56]:
#The Appartments Data is grouped by the city so we can aggregate the rent and so on per City
app_city=appartments.groupby(["regio2"])

The mean total Rent per City

In [57]:
#Here we save the grouping of the total Rent by city as "meanofcityRent" and take the mean
meanofcityRent=app_city["totalRent"].mean()
meanofcityRent

regio2
Aachen                808.837117
Ahrweiler             811.195740
Aichach Friedberg    1037.368621
Alb Donau             894.634437
Altenburger Land      454.977522
                        ...     
Wuppertal             654.873826
Würzburg              906.299109
Zollernalbkreis       823.415200
Zweibrücken           605.617021
Zwickau               482.296626
Name: totalRent, Length: 394, dtype: float64

In [58]:
#We merge the menofcityRent to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(meanofcityRent, right_index=True, left_on="city", how="left")

In [59]:
uni_cities

Unnamed: 0,old_index,world_rank,institution,country,national_rank,quality_of_education,quality_of_faculty,year,city,totalRent
0,81,82,Ruprecht Karl University of Heidelberg,Germany,1,87,52,2012,Heidelberg,1348.422541
1,82,83,Ludwig Maximilian University of Munich,Germany,2,90,90,2012,München,1904.166454
2,89,90,Technical University of Munich,Germany,3,52,101,2012,München,1904.166454
3,166,67,Ludwig Maximilian University of Munich,Germany,1,62,92,2013,München,1904.166454
4,178,79,Ruprecht Karl University of Heidelberg,Germany,2,73,55,2013,Heidelberg,1348.422541
...,...,...,...,...,...,...,...,...,...,...
110,1992,793,University of Hohenheim,Germany,51,367,218,2015,Stuttgart,1487.458776
111,2000,801,University of Oldenburg,Germany,52,367,218,2015,Oldenburg,759.706757
112,2056,857,University of Siegen,Germany,53,367,218,2015,Siegen Wittgenstein,702.158013
113,2075,876,University of Osnabrück,Germany,54,367,218,2015,Osnabrück,796.577535


The median of totalRent per city




In [60]:
#Here we save the grouping of the total Rent by city as "medianoftotalRent" and take the median
medianoftotalRent=app_city["totalRent"].median()
medianoftotalRent

regio2
Aachen               735.0
Ahrweiler            760.0
Aichach Friedberg    985.0
Alb Donau            895.0
Altenburger Land     430.0
                     ...  
Wuppertal            585.0
Würzburg             870.0
Zollernalbkreis      800.0
Zweibrücken          550.0
Zwickau              440.0
Name: totalRent, Length: 394, dtype: float64

In [61]:
#We merge the medianoftotalRent to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(medianoftotalRent, right_index=True, left_on="city", how="left")

Mean of serviceCharge per city


In [62]:
meanofserviceCharge=app_city["serviceCharge"].mean()
meanofserviceCharge

regio2
Aachen               159.659296
Ahrweiler            164.033762
Aichach Friedberg    163.269231
Alb Donau            160.179000
Altenburger Land     113.986117
                        ...    
Wuppertal            141.145691
Würzburg             128.759734
Zollernalbkreis      171.581560
Zweibrücken          141.250000
Zwickau              130.361212
Name: serviceCharge, Length: 394, dtype: float64

In [63]:
uni_cities=uni_cities.merge(meanofserviceCharge, right_index=True, left_on="city", how="left")

Median of *serviceCharge* per city


In [64]:
medianofserviceCharge=app_city["serviceCharge"].median()
medianofserviceCharge

regio2
Aachen               150.0
Ahrweiler            150.0
Aichach Friedberg    150.0
Alb Donau            150.0
Altenburger Land     110.0
                     ...  
Wuppertal            125.0
Würzburg             130.0
Zollernalbkreis      150.0
Zweibrücken          135.0
Zwickau              125.0
Name: serviceCharge, Length: 394, dtype: float64

In [65]:
uni_cities=uni_cities.merge(medianofserviceCharge, right_index=True, left_on="city", how="left")

Percentage of *balcony*

In [66]:
def get_balcony_percentage_by_city(city_entries: pd.Series) -> float:
    vc = city_entries.value_counts(normalize = True)
    if True in vc:
        return vc[True]
    else:
        return 1 - vc[False]

test3 = app_city["balcony"].apply(get_balcony_percentage_by_city)

In [67]:
fertig=app_city["balcony"].apply(get_balcony_percentage_by_city).reset_index()

In [74]:
def get_balcony_percentage_by_city2(city_entries: pd.Series) -> float:
    vc = city_entries.value_counts(normalize = True)
    if True in vc:
        vc = vc.unstack(level=1).fillna(0)
        vc.columns = vc.columns.droplevel(0)
        return vc[True]
    else:
        return 1 - vc[False]

In [76]:
petsAllowedtest=app_city["petsAllowed"].apply(get_balcony_percentage_by_city2).reset_index()


KeyError: False

In [77]:
vc = vc.unstack(level=1).fillna(0)
vc.columns = vc.columns.droplevel(0)

NameError: name 'vc' is not defined

In [69]:
petsAllowedtest=app_city["petsAllowed"].value_counts(normalize=True).to_dict()
petsAllowedtest
#app_city(['id', 'group', 'term']).size().unstack(fill_value=0)

{('Aachen', 'no'): 0.5311315646995127,
 ('Aachen', 'negotiable'): 0.4401732539252842,
 ('Aachen', 'yes'): 0.028695181375203032,
 ('Ahrweiler', 'no'): 0.463302752293578,
 ('Ahrweiler', 'negotiable'): 0.44954128440366975,
 ('Ahrweiler', 'yes'): 0.0871559633027523,
 ('Aichach Friedberg', 'no'): 0.5360824742268041,
 ('Aichach Friedberg', 'negotiable'): 0.41237113402061853,
 ('Aichach Friedberg', 'yes'): 0.05154639175257732,
 ('Alb Donau', 'no'): 0.6875,
 ('Alb Donau', 'negotiable'): 0.28125,
 ('Alb Donau', 'yes'): 0.03125,
 ('Altenburger Land', 'negotiable'): 0.6397694524495677,
 ('Altenburger Land', 'yes'): 0.29971181556195964,
 ('Altenburger Land', 'no'): 0.06051873198847262,
 ('Altenkirchen Westerwald', 'no'): 0.5806451612903226,
 ('Altenkirchen Westerwald', 'negotiable'): 0.3978494623655914,
 ('Altenkirchen Westerwald', 'yes'): 0.021505376344086023,
 ('Altmarkkreis Salzwedel', 'no'): 0.4489795918367347,
 ('Altmarkkreis Salzwedel', 'negotiable'): 0.42857142857142855,
 ('Altmarkkreis Sal

In [70]:
#Here we calcuate the percentage of appartments containing a balcony
percentageofbalcony = app_city['balcony'].value_counts(normalize=True) #.to_frame()
percentageofbalcony
percentageofbalcony.rename["balcony"]

TypeError: 'method' object is not subscriptable

In [None]:
app_city['balcony'].value_counts(normalize=True).reset_index()

ValueError: cannot insert balcony, already exists

In [None]:
percentagebalcony.reset_index(drop=False, inplace=False)

ValueError: cannot insert balcony, already exists

In [None]:
filt_test=percentageofbalcony["balcony"]==False

percentageofbalcony.drop(index=percentageofbalcony[filt_test].index, inplace=True)
percentageofbalcony

Unnamed: 0_level_0,Unnamed: 1_level_0,balcony
regio2,balcony,Unnamed: 2_level_1
Aachen,True,0.617223
Aachen,False,0.382777
Ahrweiler,True,0.753943
Ahrweiler,False,0.246057
Aichach Friedberg,True,0.824427
...,...,...
Zollernalbkreis,False,0.385135
Zweibrücken,False,0.630952
Zweibrücken,True,0.369048
Zwickau,False,0.527280


In [None]:
percentageofbalcony = percentageofbalcony.drop(percentageofbalcony["balcony"==True].index)

KeyError: False

In [None]:
percentageoftruebalcony= percentageofbalcony.filter(items=["balcony"==True])
percentageoftruebalcony

Series([], Name: balcony, dtype: float64)

In [None]:
filt_balcony=(app_city["balcony"]==True)

percentageofbalcony[filt_balcony]

KeyError: False

In [None]:
#We merge the percentageofbalcony to the uni_cities DataFrame as new column
uni_cities.merge(fertig, right_index=True, left_on="city", how="left")

ValueError: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat

The mean base rent

In [None]:
#Here we save the grouping of the base rent by city as "meanofbaseRent"
meanofbaseRent=app_city["baseRent"].mean()
meanofbaseRent

regio2
Aachen               632.835178
Ahrweiler            649.028864
Aichach Friedberg    874.308092
Alb Donau            766.675130
Altenburger Land     325.581940
                        ...    
Wuppertal            484.485008
Würzburg             762.524601
Zollernalbkreis      655.661486
Zweibrücken          446.357143
Zwickau              338.527671
Name: baseRent, Length: 394, dtype: float64

In [None]:
#We merge the meanofbaseRent to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(meanofbaseRent, right_index=True, left_on="city", how="left")

The median base rent

In [None]:
#Here we save the grouping of the base Rent by city as "medianofbaseRent"
medianofbaseRent=app_city["baseRent"].median()
medianofbaseRent

regio2
Aachen               570.0
Ahrweiler            570.0
Aichach Friedberg    795.0
Alb Donau            750.0
Altenburger Land     300.0
                     ...  
Wuppertal            420.0
Würzburg             737.5
Zollernalbkreis      650.0
Zweibrücken          412.5
Zwickau              301.0
Name: baseRent, Length: 394, dtype: float64

In [None]:
#We merge the medianofbaseRent to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(medianofbaseRent, right_index=True, left_on="city", how="left")

The mean living space

In [None]:
#Here we save the grouping of the living space by city as "meanoflivingSpace"
meanoflivingSpace=app_city["livingSpace"].mean()
meanoflivingSpace

regio2
Aachen               72.709100
Ahrweiler            83.978013
Aichach Friedberg    88.872290
Alb Donau            85.743368
Altenburger Land     63.751907
                       ...    
Wuppertal            70.879882
Würzburg             72.218859
Zollernalbkreis      84.952432
Zweibrücken          76.940000
Zwickau              63.899057
Name: livingSpace, Length: 394, dtype: float64

In [None]:
#We merge the meanoflivingSpace to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(meanoflivingSpace, right_index=True, left_on="city", how="left")

The median living space

In [None]:
#Here we save the grouping of living space by city as "medianoflivingSpace"
medianoflivingSpace=app_city["livingSpace"].median()
medianoflivingSpace

regio2
Aachen               70.00
Ahrweiler            78.00
Aichach Friedberg    85.00
Alb Donau            84.00
Altenburger Land     60.60
                     ...  
Wuppertal            67.32
Würzburg             70.00
Zollernalbkreis      82.50
Zweibrücken          77.50
Zwickau              60.00
Name: livingSpace, Length: 394, dtype: float64

In [None]:
#We merge the medianoflivingSpace to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(medianoflivingSpace, right_index=True, left_on="city", how="left")

The mean no. of rooms

In [None]:
#Here we save the grouping of the no of rooms by city as "noRooms"
meanofnoRooms=app_city["noRooms"].mean()
meanofnoRooms

regio2
Aachen               2.516520
Ahrweiler            2.679811
Aichach Friedberg    3.015267
Alb Donau            3.023316
Altenburger Land     2.506633
                       ...   
Wuppertal            2.493708
Würzburg             2.463225
Zollernalbkreis      2.979730
Zweibrücken          2.690476
Zwickau              2.457381
Name: noRooms, Length: 394, dtype: float64

In [None]:
#We merge the meanofnoRooms to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(meanofnoRooms, right_index=True, left_on="city", how="left")

The median no. of rooms

In [None]:
#Here we save the grouping of no of rooms by city as "medianofnoRooms"
medianofnoRooms=app_city["noRooms"].median()
medianofnoRooms

regio2
Aachen               2.5
Ahrweiler            3.0
Aichach Friedberg    3.0
Alb Donau            3.0
Altenburger Land     2.0
                    ... 
Wuppertal            2.0
Würzburg             2.5
Zollernalbkreis      3.0
Zweibrücken          3.0
Zwickau              2.0
Name: noRooms, Length: 394, dtype: float64

In [None]:
#We merge the medianofnoRooms to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(medianofnoRooms, right_index=True, left_on="city", how="left")

In [None]:
 ## Percentage of hasKitchen

In [None]:
#Here we save the percentages for appartments containing a kitchen as "percentageofhasKitchen"
percentageofhasKitchen = app_city['hasKitchen'].value_counts(normalize=True)

In [None]:
#We merge the percentageofhasKitchen to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(percentageofhasKitchen, right_index=True, left_on="city", how="left")

NameError: name 'percentageofhasKitchen' is not defined

In [None]:
## Percentage of cellar

In [None]:
#Here we save the percentages for appartments containing a cellar as "percentageofcellar"
percentageofcellar = app_city['cellar'].value_counts(normalize=True)
percentageofcellar

regio2             cellar
Aachen             True      0.677680
                   False     0.322320
Ahrweiler          True      0.621451
                   False     0.378549
Aichach Friedberg  True      0.679389
                               ...   
Zollernalbkreis    False     0.405405
Zweibrücken        False     0.547619
                   True      0.452381
Zwickau            True      0.687461
                   False     0.312539
Name: cellar, Length: 788, dtype: float64

In [None]:
#We merge the percentageofcellar to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(percentageofcellar, right_index=True, left_on="city", how="left")

ValueError: len(left_on) must equal the number of levels in the index of "right"

In [None]:
## Percentage of lift 


In [None]:
#Here we save the percentages for appartments containing a lift as "percentageoflift"
percentageoflift = app_city['lift'].value_counts(normalize=True)
percentageoflift

regio2             lift 
Aachen             False    0.741652
                   True     0.258348
Ahrweiler          False    0.719243
                   True     0.280757
Aichach Friedberg  False    0.709924
                              ...   
Zollernalbkreis    True     0.243243
Zweibrücken        False    0.928571
                   True     0.071429
Zwickau            False    0.905909
                   True     0.094091
Name: lift, Length: 784, dtype: float64

In [None]:
#We merge the percentageoflift to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(percentageoflift, right_index=True, left_on="city", how="left")

NameError: name 'percentageoflift' is not defined

In [None]:
## Percentage of garden

In [None]:
#Here we save the percentages for appartments containing a garden as "percentageofgarden"
percentageofgarden = app_city['garden'].value_counts(normalize=True)
percentageofgarden

regio2             garden
Aachen             False     0.776450
                   True      0.223550
Ahrweiler          False     0.776025
                   True      0.223975
Aichach Friedberg  False     0.725191
                               ...   
Zollernalbkreis    True      0.290541
Zweibrücken        False     0.928571
                   True      0.071429
Zwickau            False     0.762611
                   True      0.237389
Name: garden, Length: 788, dtype: float64

In [None]:
#We merge the percentageoflift to the uni_cities DataFrame as new column
uni_cities=uni_cities.merge(percentageoflift, right_index=True, left_on="city", how="left")

NameError: name 'percentageoflift' is not defined

In [None]:
uni_cities
#uni_cities.to_csv("data/final.csv")
uni_cities.to_excel("data/test.xlsx")

In [None]:
#end