# Data prep. 

In [None]:
# import packages 
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# import data 
silver_data = pd.read_csv("silver_data_eda.csv")

### Pick columns for model

In [None]:
silver_data.columns

Index(['bfe_nummer', 'adresse', 'offentlig_ejendomsværdi',
       'offentlige_grundværdi', 'seneste_handelspris', 'enhedsareal_beboelse',
       'pris_pr_m2', 'seneste_handelsdato', 'seneste_handlet_andel',
       'handelsmetode', 'anvendelse', 'opførelsesår',
       'tinglyst_areal_ejerlejligheder', 'longitude', 'latitude',
       'primær_ejers_adresse', 'vejnavn', 'husnr', 'etage', 'antal_værelser',
       'year', 'month', 'day'],
      dtype='object')

In [None]:
#extract columns model df


silver_data_model = silver_data[[
    'bfe_nummer',
    'offentlig_ejendomsværdi',
    'offentlige_grundværdi', 
    'seneste_handelspris', 
    'enhedsareal_beboelse',
    'antal_værelser',
    #'pris_pr_m2',
    'vejnavn',
    'husnr',
    'longitude',
    'latitude', 
    'opførelsesår',
    'tinglyst_areal_ejerlejligheder',
    'seneste_handelsdato', 
    'year',
    'month',
    'etage']].copy()

In [None]:
silver_data_model

Unnamed: 0,bfe_nummer,offentlig_ejendomsværdi,offentlige_grundværdi,seneste_handelspris,enhedsareal_beboelse,antal_værelser,vejnavn,husnr,longitude,latitude,opførelsesår,tinglyst_areal_ejerlejligheder,seneste_handelsdato,year,month,etage
0,111445,2250000.0,326700.0,6200000,114.0,4,AmagerBoulevard,110,12.593788,55.666513,2018.0,101.0,2022-03-25,2022,3,1.0
1,103789,2100000.0,404900.0,4000000,83.0,2,Weidekampsgade,61,12.581636,55.667398,2006.0,71.0,2022-03-24,2022,3,1.0
2,135089,1350000.0,167400.0,3845000,78.0,4,Marengovej,12,12.621136,55.658203,1935.0,75.0,2022-03-21,2022,3,2.0
3,132821,1050000.0,97500.0,2375000,62.0,2,Elbagade,21,12.613412,55.653728,1932.0,54.0,2022-03-15,2022,3,5.0
4,131622,960000.0,82000.0,2725000,56.0,2,Caprivej,2,12.621147,55.660681,1937.0,53.0,2022-03-15,2022,3,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3595,107328,2450000.0,386900.0,3700000,90.0,3,Myggenæsgade,3,12.584155,55.665330,2006.0,79.0,2019-01-02,2019,1,2.0
3596,130268,580000.0,53400.0,1295000,32.0,1,Siciliensgade,11,12.618511,55.652964,1936.0,27.0,2019-01-02,2019,1,6.0
3597,134871,1850000.0,110700.0,3650000,94.0,3,Holmbladsgade,70,12.611411,55.665193,1913.0,90.0,2019-01-02,2019,1,5.0
3598,138884,560000.0,82200.0,1175000,34.0,1,Hyacintgården,9,12.612206,55.648623,1968.0,31.0,2019-01-02,2019,1,2.0


### Change col name to English names

In [None]:
# change col name to English names 
silver_data_model = silver_data_model.rename(columns={
    'bfe_nummer' : 'bfe',
    'offentlig_ejendomsværdi' : 'gov_property_value',
    'offentlige_grundværdi' : 'gov_land_value', 
    'seneste_handelspris' : 'sold_price', 
    'enhedsareal_beboelse' : 'unit_sqm_resi',
    'antal_værelser' : 'rooms',
    #'pris_pr_m2',
    'husnr' : 'street_number',
    'vejnavn' : 'street_name', 
    'opførelsesår' : 'building_year',
    'tinglyst_areal_ejerlejligheder' : 'gov_sqm',
    'seneste_handelsdato' : 'sold_date', 
    'year' : 'year',
    'month' : 'month',
    'etage' : 'floor'})

In [None]:
silver_data_model.head()

Unnamed: 0,bfe,gov_property_value,gov_land_value,sold_price,unit_sqm_resi,rooms,street_name,street_number,longitude,latitude,building_year,gov_sqm,sold_date,year,month,floor
0,111445,2250000.0,326700.0,6200000,114.0,4,AmagerBoulevard,110,12.593788,55.666513,2018.0,101.0,2022-03-25,2022,3,1.0
1,103789,2100000.0,404900.0,4000000,83.0,2,Weidekampsgade,61,12.581636,55.667398,2006.0,71.0,2022-03-24,2022,3,1.0
2,135089,1350000.0,167400.0,3845000,78.0,4,Marengovej,12,12.621136,55.658203,1935.0,75.0,2022-03-21,2022,3,2.0
3,132821,1050000.0,97500.0,2375000,62.0,2,Elbagade,21,12.613412,55.653728,1932.0,54.0,2022-03-15,2022,3,5.0
4,131622,960000.0,82000.0,2725000,56.0,2,Caprivej,2,12.621147,55.660681,1937.0,53.0,2022-03-15,2022,3,2.0


In [None]:
silver_data_model.isnull().sum()

bfe                   0
gov_property_value    0
gov_land_value        0
sold_price            0
unit_sqm_resi         0
rooms                 0
street_name           0
street_number         0
longitude             0
latitude              0
building_year         0
gov_sqm               0
sold_date             0
year                  0
month                 0
floor                 0
dtype: int64

### Save to CSV

In [None]:
# save dataset to csv
silver_data_model.to_csv("silver_data_model.csv", index=False)

### Data stats

In [None]:
print('Observations:',len(silver_data_model))
print('Columns:', len(silver_data_model.columns))

Observations: 3600
Columns: 16


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=8a2d9d23-5445-401b-81ac-9e4d2df66dba' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>