In [None]:
import pandas as pd
import numpy as np
from IPython.display import display
import lightgbm as lgb

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# check on the panda version and its dependencies
# i run this from time to time to ensure all is up to date
pd.__version__
#pd.show_versions()

In [None]:
# i use these to input the relevant file names
# which i downloaded earlier and sit in the same directory as this
file_train = '/kaggle/input/tabular-playground-series-jun-2022/data.csv'
file_test = '/kaggle/input/tabular-playground-series-jun-2022/data.csv'
file_sampleSubmission = '/kaggle/input/tabular-playground-series-jun-2022/sample_submission.csv'

In [None]:
# which makes this a standard cell and keeps the original traing and test data in memory
# use train for the training data, test for the test data
# and also download the sample submission for quick reference
df_train = pd.read_csv(file_train)
df_test = pd.read_csv(file_test)
df_sampleSubmission = pd.read_csv(file_sampleSubmission)

In [None]:
df_train

In [None]:
# get all the rows with missing data, this will be the test data
# put them in a new dataframe

df_new_test = pd.read_csv(file_test)
df_new_test = df_new_test[df_new_test.isnull().any(axis=1)]
df_new_test=df_new_test.reset_index()

In [None]:
# get all the rows with complete data, this will be the training data
# put them in a new dataframe

df_new_train = df_train.dropna()
df_new_train=df_new_train.reset_index()

In [None]:
df_new_test

In [None]:
df_new_train

In [None]:
# set up the submission dataframe
# i will delete the first dummy row at the end

submission = pd.DataFrame(['remove me'],columns=['row-col'])
submission['value'] = 0.00
submission

In [None]:
# the targets will be every column in the test data with missing data
# i will loop through them making each on the target in turn

col_targets = df_new_test.columns[df_new_test.isnull().any()].tolist()

df_new_test.columns[df_new_test.isnull().any()]


In [None]:
# loop through each column with null values
# that column is the target and a new model is trained each time

# loop through those data sets creating a new model for each
for i in range(len(col_targets)):

    # keep a printed record of where we are
    print('starting', col_targets[i], i, 'of', len(col_targets)-1)

    # set up the training and test data
    xtrain =  df_new_train.copy()
    xtrain = xtrain.reset_index(drop=True)
    xtest = df_new_test.copy()

    # the test data is all rows in the target column that are null
    # i will drop the target column further below
    xtest = xtest[xtest[col_targets[i]].isnull()]

    # i need to keep a record of the row_id for the submission
    xtest_ids = xtest.row_id.values.tolist()    

    # set up the useful features
    # which is the column headers except index and row_id
    useful_features = df_new_train.columns.tolist()
    useful_features.remove('index')
    useful_features.remove('row_id')

    # only have the the useful features in the training and test data
    xtrain = xtrain[useful_features]
    xtest = xtest[useful_features]

    # the training target is the target column
    ytrain = xtrain[col_targets[i]]

    # remove the target column from the training and test data
    xtrain = xtrain.drop(col_targets[i], axis=1)
    xtest = xtest.drop(col_targets[i], axis=1)

    # run the model
    model = lgb.LGBMRegressor(random_state=0, n_jobs=-1,
                             n_estimators=1000)

    model.fit(xtrain, ytrain)

    # get the predictions
    test_preds = model.predict(xtest)

    # build up the submission file
    strings = [str(x) for x in xtest_ids]
    strings = [s + '-' for s in strings]
    row_col_ids = [s + col_targets[i] for s in strings]
    submission_add = pd.DataFrame(row_col_ids,columns=['row-col'])
    submission_add['value'] = test_preds

    # combine the dataframes so far to build up the submission dataframe
    submission = pd.concat([submission, submission_add])

In [None]:
# remove the dummy first row
submission = submission.iloc[1: , :]

In [None]:
submission

In [None]:
# save as a csv for submission
column_names = ["row-col", "value"]
submission.to_csv("submission.csv", header=column_names, index=False)