In [6]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer

In [2]:
# Load CSV data 
df = pd.read_csv('mat,wagonremoved.csv')

In [3]:
# Replace non-numeric values with NaN  
df['Heat No'] = pd.to_numeric(df['Heat No'], errors='coerce')
df['Mat Code'] = pd.to_numeric(df['Mat Code'], errors='coerce')


In [4]:
# Identify missing values
heat_no_missing = df[df['Heat No'].isnull()]  
mat_code_missing = df[df['Mat Code'].isnull()]


In [7]:
# Impute missing values
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')

In [8]:
imputer.fit(df[['Heat No']])  

In [9]:
heat_no_filled = imputer.transform(heat_no_missing[['Heat No']])

In [10]:
imputer.fit(df[['Mat Code']])

In [11]:
mat_code_filled = imputer.transform(mat_code_missing[['Mat Code']])


In [12]:
# Concatenate and preview imputed values 
imputed = pd.concat([pd.DataFrame(heat_no_filled, columns=['Heat No']),
                     pd.DataFrame(mat_code_filled, columns=['Mat Code'])],
                     axis=1)

In [13]:
print(imputed.head())

     Heat No      Mat Code
0  2303249.0  1.512000e+14
1  2303249.0  1.512000e+14
2  2303249.0  1.512000e+14
3  2303249.0  1.512000e+14
4  2303249.0  1.512000e+14


In [14]:
imputed

Unnamed: 0,Heat No,Mat Code
0,2303249.0,1.512000e+14
1,2303249.0,1.512000e+14
2,2303249.0,1.512000e+14
3,2303249.0,1.512000e+14
4,2303249.0,1.512000e+14
...,...,...
293,2303249.0,
294,2303249.0,
295,2303249.0,
296,2303249.0,
