In [33]:
import pandas as pd

In [34]:
df = pd.read_csv("../data/processed/1/train.csv")
df.head()

Unnamed: 0,available_bike_stands,temperature,surface_pressure,apparent_temperature,dew_point
0,7,10.9,985.4,10.0,7.8
1,7,11.2,984.9,9.8,8.0
2,12,11.1,985.2,10.1,8.2
3,14,10.7,985.2,9.8,7.9
4,13,9.9,985.1,9.0,7.9


In [35]:
# print missing values
df.isna().sum()

available_bike_stands     0
temperature              31
surface_pressure         31
apparent_temperature     31
dew_point                31
dtype: int64

In [36]:
missing_indexes = df.index[df.isnull().any(axis=1)]
missing_indexes

Index([ 40,  44,  75, 103, 123, 155, 156, 174, 181, 210, 221, 228, 248, 260,
       274, 279, 293, 307, 339, 340, 354, 359, 360, 361, 362, 363, 364, 365,
       366, 367, 368],
      dtype='int64')

In [37]:
cols_to_fill = df.columns[df.isna().sum() > 0]
cols_to_fill

Index(['temperature', 'surface_pressure', 'apparent_temperature', 'dew_point'], dtype='object')

In [38]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

pipeline = Pipeline(steps=[
    ("scaler", MinMaxScaler()),
    ("model", RandomForestRegressor())
])

In [39]:
for col in cols_to_fill:
    incomplete_rows = df[df[col].isna()]
    complete_rows = df[~df[col].isna()]

    features = [c for c in df.columns if c != col]
    target = col

    X_train = complete_rows[features]
    y_train = complete_rows[target]

    pipeline.fit(X_train, y_train)

    predictions = pipeline.predict(incomplete_rows[features])

    df.loc[incomplete_rows.index, col] = predictions

In [40]:
df.isna().sum()

available_bike_stands    0
temperature              0
surface_pressure         0
apparent_temperature     0
dew_point                0
dtype: int64

In [41]:
df.loc[missing_indexes]

Unnamed: 0,available_bike_stands,temperature,surface_pressure,apparent_temperature,dew_point
40,19,11.224,990.004,9.256,6.185
44,1,11.186,989.238,9.294,6.322
75,3,11.21,991.652,9.398,6.277
103,13,11.216,985.184,9.263,5.779
123,9,11.19,982.202,9.228,4.1
155,22,11.19,989.8225,9.247,6.37
156,21,11.196,989.808,9.22,6.393
174,0,11.212,989.413,9.313,6.542
181,15,11.236,983.288,9.243,3.176
210,16,11.228,982.412,9.242,3.71
