In [1]:
from pathlib import Path

import pandas as pd
import numpy as np

In [2]:
dataset_path = Path("./dataset/")
print([x.name for x in dataset_path.iterdir()])

['train.csv', 'train_v2.csv', 'subm_v3.csv', 'train_v1.csv', 'test_v1.csv', 'test.csv', 'sample_submission.csv']


In [3]:
train_df = pd.read_csv(dataset_path/"train.csv")
test_df = pd.read_csv(dataset_path/"test.csv")

print(train_df.isna().sum().sum())
print(test_df.isna().sum().sum())

train_df.shape, test_df.shape

6085
1610


((39499, 15), (10500, 14))

In [4]:
cat_cols = ["Property_Type", "Furnishing", "Power_Backup", "Water_Supply", "Crime_Rate", "Dust_and_Noise"]
num_cols = ["Property_Area", "Number_of_Windows", "Number_of_Doors", "Frequency_of_Powercuts", "Traffic_Density_Score", "Air_Quality_Index", "Neighborhood_Review"]

In [5]:
def make_sub_file(test_ids, preds):
    return pd.DataFrame.from_dict({"Property_ID": test_ids, "Habitability_score": preds})

In [6]:
results_path = Path("./results")
results_path.mkdir(exist_ok=True, parents=True)

In [8]:
import optuna
from fastai.tabular.all import *

In [9]:
splits = RandomSplitter(valid_pct=0.2)(range_of(train_df))

In [11]:
to = TabularPandas(train_df, procs=[Categorify, FillMissing, Normalize],
                   cat_names=cat_cols, cont_names=num_cols, y_names="Habitability_score",
                   splits=splits, y_block=RegressionBlock)

In [None]:
def objective(trial):
    bs = trial.suggest_categorical([32, 64, 128, 256, 512])
    epochs = trial.suggest_int(10, 100)
    layer1 = trial.suggest_int(100, 250)
    layer2 = trial.suggest_int(75, 100)
    layer3 = trial.suggest_int(25, 75)
    
    dls = to.dataloaders(bs=256)
    learner = tabular_learner(dls, layers=[layer1, layer2, layer3], metrics=rmse)
    learner.dls.get_preds
    return 

In [12]:
dls = to.dataloaders(bs=256)
# dls.show_batch(max_n=10)

In [13]:
learner = tabular_learner(dls, layers=[250, 120, 50], metrics=rmse)
learner

learner.fit(100)

epoch,train_loss,valid_loss,_rmse,time
0,5345.719727,5077.870605,71.259178,00:01
1,4863.506836,4749.646973,68.917679,00:00
2,4168.141113,3841.188477,61.977325,00:00
3,3292.656738,2874.783691,53.617012,00:00
4,2394.573242,2028.578857,45.039749,00:00
5,1564.924561,1224.976196,34.99966,00:00
6,874.139954,636.750122,25.233906,00:00
7,388.223572,237.958801,15.425913,00:00
8,130.867493,64.891045,8.055498,00:00
9,48.055115,36.215504,6.017932,00:00


In [16]:
learner.summary()

TabularModel (Input shape: 256 x 8)
Layer (type)         Output Shape         Param #    Trainable 
                     256 x 5             
Embedding                                 35         True      
____________________________________________________________________________
                     256 x 3             
Embedding                                 12         True      
Embedding                                 12         True      
____________________________________________________________________________
                     256 x 4             
Embedding                                 24         True      
Embedding                                 20         True      
____________________________________________________________________________
                     256 x 3             
Embedding                                 12         True      
Embedding                                 9          True      
Embedding                                 9          

In [15]:
learner.show_results()

dl = learner.dls.test_dl(test_df)
test_preds = learner.get_preds(dl=dl)

subm_df = make_sub_file(test_df.Property_ID.values, test_preds[0].squeeze())
subm_df.head()

subm_df.to_csv(results_path/"fastai_subm_v1.csv")

Unnamed: 0,Property_Type,Furnishing,Power_Backup,Water_Supply,Crime_Rate,Dust_and_Noise,Number_of_Windows_na,Frequency_of_Powercuts_na,Property_Area,Number_of_Windows,Number_of_Doors,Frequency_of_Powercuts,Traffic_Density_Score,Air_Quality_Index,Neighborhood_Review,Habitability_score,Habitability_score_pred
0,6.0,3.0,3.0,4.0,1.0,3.0,1.0,1.0,0.447348,-0.350797,0.571072,-0.628708,-0.612549,-0.421441,-0.633887,76.940002,77.180321
1,2.0,3.0,2.0,4.0,2.0,0.0,1.0,1.0,-0.457103,-0.350797,-0.278625,-0.628708,-2.138301,-1.365182,0.343781,76.32,65.24501
2,6.0,3.0,2.0,1.0,2.0,3.0,1.0,1.0,0.231616,0.028456,-0.278625,0.711314,-1.014495,-0.597392,-0.220258,62.119999,66.573814
3,5.0,2.0,3.0,4.0,4.0,3.0,1.0,1.0,-0.179083,0.028456,0.571072,-0.628708,0.691723,0.090419,0.882752,87.239998,85.944405
4,6.0,2.0,2.0,4.0,4.0,3.0,1.0,1.0,-0.09198,-0.350797,0.571072,2.051337,-0.194198,-0.421441,-0.633887,71.839996,74.258965
5,2.0,3.0,2.0,4.0,4.0,3.0,1.0,1.0,-0.436847,-1.109304,-1.128323,0.711314,0.371807,0.010441,0.105631,73.809998,71.591454
6,6.0,2.0,2.0,1.0,4.0,3.0,1.0,1.0,0.352142,-0.730051,0.571072,-0.628708,-0.36646,-0.389449,1.321449,79.089996,76.004951
7,6.0,2.0,3.0,4.0,4.0,3.0,1.0,1.0,0.406328,0.028456,-1.128323,-0.628708,0.142124,0.378341,0.857684,90.290001,86.658348
8,2.0,2.0,3.0,1.0,1.0,3.0,1.0,1.0,-0.344173,0.028456,0.571072,-0.628708,0.724534,-0.229493,-0.270395,73.410004,71.248741


In [136]:
# 84.83