In [269]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [270]:
data = pd.read_excel('HARGA RUMAH JAKSEL.xlsx', sheet_name='Sheet1',skiprows=1)
data.head()

Unnamed: 0,HARGA,LT,LB,JKT,JKM,GRS,KOTA
0,28000000000,1100,700,5,6,ADA,JAKSEL
1,19000000000,824,800,4,4,ADA,JAKSEL
2,4700000000,500,400,4,3,ADA,JAKSEL
3,4900000000,251,300,5,4,ADA,JAKSEL
4,28000000000,1340,575,4,5,ADA,JAKSEL


In [271]:
pd.DataFrame(data.HARGA.describe()).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
HARGA,1001.0,17474720000.0,20795480000.0,430000000.0,6750000000.0,13500000000.0,20000000000.0,250000000000.0


## BIVARIAT

In [272]:
px.histogram(
    data_frame=data, x ='LT', y = 'HARGA',
    nbins=5,histfunc='avg',color_discrete_sequence=['black'],
    title='Perbandingan Harga Dengan Luas Tanah'
            )

In [273]:
px.histogram(data_frame=data,x='LB',y='HARGA',
             title='Harga Rumah Jakarta Selatan Berdasarkan Luas Bangunan',
             histfunc='avg',
             nbins=4,color_discrete_sequence=['#749F82']
             )

## UNIVARIAT

In [274]:
pd.DataFrame(data['GRS'].value_counts().reset_index())

Unnamed: 0,GRS,count
0,ADA,779
1,TIDAK ADA,222


In [275]:
px.pie(data_frame=data['GRS'].value_counts().reset_index(), 
       values = 'count',
       names = 'GRS',
       title = 'Jumlah Rumah Jaksel yang Ada Garasi'
       )

In [276]:
pd.DataFrame(data['JKT'].describe()).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
JKT,1001.0,4.457542,2.004606,1.0,4.0,4.0,5.0,27.0


In [277]:
dataKamarTidur = pd.DataFrame(
    data['JKT'].value_counts().reset_index().rename(columns={'JKT' :'JKT','count': 'Banyak Rumah'})
    )
px.bar(
    data_frame=dataKamarTidur, x = 'JKT', y = 'Banyak Rumah',
    title = 'Jumlah Kamar Tidur Rumah Jaksel'
)

In [298]:
dataKamarMandi = pd.DataFrame(
    data['JKM'].value_counts().reset_index().rename(columns={'count':'Banyak Rumah'})
  )
px.bar(
    data_frame=dataKamarMandi, y = 'Banyak Rumah', x = 'JKM',
    title = 'Jumlah Kamar Mandi Rumah Jaksel'
)

In [279]:
data['KOTA'].value_counts()

KOTA
JAKSEL    1001
Name: count, dtype: int64

## PREPROCESSING

In [280]:
data.drop(columns='KOTA',inplace=True,axis=1)

In [281]:
data.GRS = data.GRS.map({'ADA':1,'TIDAK ADA':0})

In [282]:
data.head()

Unnamed: 0,HARGA,LT,LB,JKT,JKM,GRS
0,28000000000,1100,700,5,6,1
1,19000000000,824,800,4,4,1
2,4700000000,500,400,4,3,1
3,4900000000,251,300,5,4,1
4,28000000000,1340,575,4,5,1


In [283]:
data.shape

(1001, 6)

In [284]:
data = data[(data.LT>data.LT.quantile(0.01))&(data.LT<data.LT.quantile(0.99))]
data = data[(data.LB>data.LB.quantile(0.01))&(data.LB<data.LB.quantile(0.99))]
data = data[(data.JKT>data.JKT.quantile(0.01))&(data.JKT<data.JKT.quantile(0.99))]
data = data[(data.JKM>data.JKM.quantile(0.01))&(data.JKM<data.JKM.quantile(0.99))]
data = data[(data.HARGA>data.HARGA.quantile(0.01))&(data.HARGA<data.HARGA.quantile(0.99))]

In [285]:
data.shape

(823, 6)

## PREDICTION

In [286]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

In [287]:
X = data.iloc[:,1:]
X

Unnamed: 0,LT,LB,JKT,JKM,GRS
0,1100,700,5,6,1
1,824,800,4,4,1
2,500,400,4,3,1
3,251,300,5,4,1
4,1340,575,4,5,1
...,...,...,...,...,...
993,169,215,4,4,1
996,488,550,6,5,1
997,209,270,4,4,1
998,692,400,4,3,0


In [288]:
y = data.iloc[:,0]
y

0      28000000000
1      19000000000
2       4700000000
3       4900000000
4      28000000000
          ...     
993     3500000000
996    16000000000
997     4500000000
998    29000000000
999     1700000000
Name: HARGA, Length: 823, dtype: int64

In [289]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [290]:
model = RandomForestRegressor()
model.fit(X_train,y_train)
hasil = model.predict(X_test)

In [291]:
hasil

array([1.74980000e+10, 3.09430000e+09, 6.59405000e+09, 5.43750000e+09,
       1.52560000e+10, 2.31000000e+10, 2.14430000e+09, 1.21170000e+10,
       4.45500000e+09, 2.80617000e+10, 2.33570000e+10, 1.54544000e+10,
       2.64467500e+10, 1.39950000e+10, 1.62115000e+10, 1.92490000e+10,
       1.63727179e+10, 1.42848000e+10, 6.88770000e+09, 3.88740000e+10,
       3.27800000e+09, 1.38310000e+10, 9.80000000e+09, 2.66920000e+10,
       1.00780000e+10, 2.51599000e+10, 2.14970000e+10, 1.21822500e+10,
       5.07290000e+09, 1.13656333e+10, 2.52278333e+10, 2.92240000e+10,
       2.31660000e+10, 2.35310000e+10, 1.97445000e+10, 2.37469000e+10,
       1.26312500e+10, 5.13341667e+10, 4.13560000e+09, 1.22540000e+10,
       6.80750000e+09, 1.22380000e+10, 2.33469000e+10, 2.12655000e+10,
       9.04070000e+09, 3.88740000e+10, 2.44020000e+10, 5.74450000e+09,
       1.17494000e+10, 2.27330000e+09, 4.82290000e+10, 2.49210000e+10,
       6.34473333e+09, 1.69470000e+10, 1.61892000e+10, 1.64646667e+10,
      

In [292]:
def prediksi(LT,LB,JKT,JKM,GRS):
    predict = pd.DataFrame()
    predict['LT'] = [LT]
    predict['LB'] = [LB]
    predict['JKT'] = [JKT]
    predict['JKM'] = [JKM]
    predict['GRS'] = [GRS]
    print(model.predict(predict))

In [293]:
prediksi(500,600,2,2,1)

[1.74288167e+10]


In [294]:
import  pickle
with open('prediksi_harga_rumah.pkl','wb') as file:
    pickle.dump(model,file)

In [295]:
%%writefile app.py

import pickle
import pandas as pd
import streamlit as st

with open ("prediksi_harga_rumah.pkl", "rb") as f:
    model = pickle.load(f)

def prediksi(LT,LB,JKT,JKM,GRS):
    predict = pd.DataFrame()
    predict['Luas Tanah'] = [LT]
    predict['Luas Bangunan'] = [LB]
    predict['Jumlah Kamar Tidur'] = [JKT]
    predict['Jumlah Kamar Mandi'] = [JKM]
    predict['Garasi'] = [GRS]
    return(model.predict(predict)[0])   

lt = st.number_input("Luas Tanah")
lb = st.number_input("Luas Bangunan")
jkt = st.number_input("Jumlah Kamar Tidur")
jkm = st.number_input("Jumlah Kamar Mandi")
opt = st.selectbox(
    'Garasi',
    ('Ada', 'Tidak ada')
)

grs = 0
if(opt == 'Ada') :
    grs = 1
elif (opt == 'Tidak ada'):
    grs = 0
    
    
if(st.button('Predict')) :
    st.write("Harga rumah impianmu adalah Rp{:,}".format(prediksi(lt,lb,jkt,jkm,grs)))

Overwriting app.py


In [296]:
import sklearn
sklearn.__version__

'1.3.1'