In [4]:
import ipywidgets as widgets
from IPython.display import display
import os

box_layout = widgets.Layout(
    display="flex",
    flex_flow="column",
    align_items="center",
    border="1px solid #E0E0E0",
    width="400px",
    padding="25px",
    border_radius="10px"
)

button_layout = widgets.Layout(width="100%", height="40px")

upload_layout = widgets.Layout(width="100%")

title = widgets.HTML(
    "<h3 style='margin-bottom:10px;'>Upload CSV Files üëá</h3>"
)

subtitle = widgets.HTML(
    "<p style='color:gray; margin-top:0;'>Select one or more CSV files to upload and save</p>"
)

train_uploader = widgets.FileUpload(
    accept='.csv',
    multiple=True,
    layout=upload_layout
)

save_button = widgets.Button(
    description='Save Files',
    button_style='primary',
    icon='save',
    layout=button_layout
)

status = widgets.HTML("<p style='color:gray;'>Waiting for upload...</p>")

output = widgets.Output()

def save_files(change=None):
    with output:
        output.clear_output()
        
        if train_uploader.value:
            saved_files = []
            for filename, file_info in train_uploader.value.items():
                with open(filename, "wb") as f:
                    f.write(file_info['content'])
                saved_files.append(filename)
            
            status.value = f"<p style='color:green;'>Saved {len(saved_files)} file(s) üëç</p>"
        else:
            status.value = "<p style='color:red;'>‚ö†Ô∏è No files uploaded yet</p>"

train_uploader.observe(save_files, names='value')

save_button.on_click(save_files)

card = widgets.VBox(
    [title, subtitle, train_uploader, save_button, status],
    layout=box_layout
)

display(card)
display(output)

VBox(children=(HTML(value="<h3 style='margin-bottom:10px;'>Upload CSV Files üëá</h3>"), HTML(value="<p style='co‚Ä¶

Output()

In [2]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression

In [5]:
df = np.round(pd.read_csv('file.csv')[['R&D Spend','Administration','Marketing Spend','Profit']]/10000)

In [9]:
df = df.drop(columns=['Profit'])
df

Unnamed: 0,R&D Spend,Administration,Marketing Spend
0,17.0,14.0,47.0
1,,15.0,44.0
2,15.0,10.0,41.0
3,14.0,,38.0
4,14.0,9.0,37.0
5,13.0,10.0,36.0
6,13.0,15.0,13.0
7,13.0,15.0,
8,12.0,15.0,31.0
9,12.0,11.0,30.0


In [8]:
df.isnull().sum()

Unnamed: 0,0
R&D Spend,1
Administration,1
Marketing Spend,1
Profit,0


### **MAR - missing at random**

Missing values are removed intentionally to implement MICE

R&D Spend (NaN) = 162597.7

Administration (NaN) = 118671.85

Marketing Spend (NaN) = 323876.68

In [14]:
print(f"r&d spend NaN:        {np.round(162597.7/10000)}")
print(f"administration NaN:   {np.round(118671.85/10000)}")
print(f"marketing spend NaN:  {np.round(323876.68/10000)}")

r&d spend NaN:        16.0
administration NaN:   12.0
marketing spend NaN:  32.0


In [23]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

imputer = IterativeImputer(
    max_iter=50,
    random_state=42,
    estimator=LinearRegression()
)

df_imp = imputer.fit_transform(df)
df_imp = pd.DataFrame(df_imp, columns=df.columns)

In [24]:
df_imp.round(2)

Unnamed: 0,R&D Spend,Administration,Marketing Spend
0,17.0,14.0,47.0
1,15.93,15.0,44.0
2,15.0,10.0,41.0
3,14.0,12.05,38.0
4,14.0,9.0,37.0
5,13.0,10.0,36.0
6,13.0,15.0,13.0
7,13.0,15.0,27.8
8,12.0,15.0,31.0
9,12.0,11.0,30.0


### **Results**

In [25]:
results = [
    [16, 15.93],
    [12, 12.05],
    [32, 27.8]
]
r_df = pd.DataFrame(results, columns=['Original NaN values', 'Predicted NaN values'])

r_df

Unnamed: 0,Original NaN values,Predicted NaN values
0,16,15.93
1,12,12.05
2,32,27.8
