In [548]:
import pandas as pd
# import numpy as np

from sklearn.preprocessing import LabelEncoder

pd.set_option('display.max_columns', None)

In [549]:
df = pd.read_excel('../Felles/TrondheimHousingData.xlsx')
df = df.rename(columns = {'Adresse': 'Address', 
                          'Boligtype': 'Housing type', 
                          'Prom': 'Primary room', 
                          'BRA': 'Usable area', 
                          'BTA': 'Gross area', 
                          'Byggeår': 'Year of construction', 
                          'Etasje': 'Floor', 
                          'Balkong': 'Balcony', 
                          'Parkering': 'Parking', 
                          'Tomtestørrelse': 'Plot size', 
                          'Sist solgt': 'Last sold', 
                          'Pris': 'Price', 
                          'Fellesgjeld ved siste salg': 'Joint debt at last sale', 
                          'Totalpris ved siste salg': 'Total price at last sale', 
                          'Antall rom': 'Number of rooms', 
                          'Antall soverom': 'Number of bedrooms', 
                          'Hjemmelshaver': 'Owner', 
                          'Registrert dato': 'Registered date', 
                          'Omsetningshastighet (dager)': 'Turnover rate', 
                          'Megler': 'Broker'})

In [550]:
df.drop(columns = ['Owner'], inplace = True)

In [551]:
df['Postal code'] = df['Address'].str.split(', ').str[1].str[:4].astype(int)

In [552]:
df = pd.get_dummies(df, columns = ['Balcony', 'Parking'], dtype = int)
df.drop(columns = ['Parking_Nei'], inplace = True)
df.rename(columns = {'Balcony_Ja': 'Balcony_Yes', 'Parking_Ja': 'Parking_Yes'}, inplace = True)

In [553]:
numeric_columns = ['Number of rooms', 'Turnover rate', 'Number of bedrooms', 'Floor', 'Year of construction', 'Usable area', 'Gross area', 'Primary room']
for column in numeric_columns:
    df[column] = df[column].fillna(0)
    df[column] = df[column].astype(int)

In [554]:
mask = (df['Last sold'] != '-') & (df['Housing type'] != 'Annet') & (df['Year of construction'] != 0)
df = df[mask].reset_index(drop = True)

In [555]:
df['Registered date'] = pd.to_datetime(df['Registered date'], dayfirst = True)
df['Last sold'] = pd.to_datetime(df['Last sold'], dayfirst = True)

In [556]:
df['Broker'] = df['Broker'].fillna('Placeholder')

In [557]:
def classify_broker(broker):
    broker = broker.lower()
    broker_list = ['Proaktiv', 'Aktiv', 'DNB Eiendom', 'EIE Eiendomsmegling', 'Eiendomsmegler 1', 'Heimdal Eiendomsmegling', 'Krogsveen', 'Lokalmegleren', 'Meglerhuset Nylander', 'Privatmegleren', 'Propr']

    for i in range(len(broker_list)):
        if broker_list[i].lower() in broker:
            return broker_list[i]
    return 'Other'

In [558]:
df['Broker'] = df['Broker'].apply(classify_broker)

In [559]:
encoder = LabelEncoder()

df['Broker encoded'] = encoder.fit_transform(df['Broker'])
df['Housing type encoded'] = encoder.fit_transform(df['Housing type'])

In [560]:
df.isnull().sum()

Matrikkel / Org-Anr            0
Address                        0
Housing type                   0
Primary room                   0
Usable area                    0
Gross area                     0
Year of construction           0
Floor                          0
Plot size                    485
Last sold                      0
Price                          0
Joint debt at last sale     4300
Total price at last sale       0
Number of rooms                0
Number of bedrooms             0
Registered date                0
Turnover rate                  0
Broker                         0
Postal code                    0
Balcony_Yes                    0
Parking_Yes                    0
Broker encoded                 0
Housing type encoded           0
dtype: int64

In [561]:
df

Unnamed: 0,Matrikkel / Org-Anr,Address,Housing type,Primary room,Usable area,Gross area,Year of construction,Floor,Plot size,Last sold,Price,Joint debt at last sale,Total price at last sale,Number of rooms,Number of bedrooms,Registered date,Turnover rate,Broker,Postal code,Balcony_Yes,Parking_Yes,Broker encoded,Housing type encoded
0,5001-400/93/0/12,"Erling Skakkes gate 1, 7013 TRONDHEIM",Selveierleilighet,40,40,40,1974,2,667.5,2023-12-20,3100000,0.00,3100000,2,1,2023-04-28,236,Heimdal Eiendomsmegling,7013,1,0,4,7
1,5001-400/123/0/3,"Vår Frue gate 8, 7013 TRONDHEIM",Selveierleilighet,49,49,54,1870,2,193.4,2023-12-04,3150000,55723.23,3205723.23,2,1,2023-06-07,180,Privatmegleren,7013,0,0,9,7
2,5001-400/102/0/5,"Schultz gate 8, 7013 TRONDHEIM",Selveierleilighet,53,53,58,1902,3,304.1,2023-11-22,4000000,25638.00,4025638,3,2,2023-11-09,13,Privatmegleren,7013,1,0,9,7
3,5001-400/93/0/10,"Erling Skakkes gate 1, 7013 TRONDHEIM",Selveierleilighet,46,46,50,1974,2,667.5,2023-08-21,3125000,0.00,3125000,2,1,2023-08-01,20,Proaktiv,7013,0,0,10,7
4,5001-400/123/0/5,"Vår Frue gate 8, 7013 TRONDHEIM",Selveierleilighet,42,42,0,1870,3,193.4,2023-07-12,3700000,51026.00,3751026,2,2,2023-07-04,8,Proaktiv,7013,0,0,10,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20583,5001-521/491/0/4,"Nordalsvegen 3, 7540 KLÆBU",Selveierleilighet,55,58,65,2017,2,998.3,2021-03-12,2390000,0.00,2390000,2,1,2021-01-11,60,Eiendomsmegler 1,7540,1,0,3,7
20584,5001-520/121/0/9,"Stormyrvegen 8 A, 7540 KLÆBU",Selveierleilighet,51,54,59,1990,1,2812.9,2021-03-12,2260000,99625.00,2359625,2,1,2021-02-06,34,Krogsveen,7540,0,1,5,7
20585,5001-520/53/0/3,"Torvmarkvegen 6 A, 7540 KLÆBU",Selveier rekkehus,112,125,130,1998,0,1026.0,2021-03-09,3500000,0.00,3500000,0,3,2019-09-05,551,Heimdal Eiendomsmegling,7540,1,0,4,5
20586,5001-521/519/0/10,"Granmovegen 18, 7540 KLÆBU",Selveierleilighet,68,68,0,2018,1,2190.4,2021-03-05,2950000,0.00,2950000,3,2,2021-02-23,10,Eiendomsmegler 1,7540,1,0,3,7
