In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from nltk.corpus import stopwords

In [2]:
# Load the CSV data
data = pd.read_csv('sampletest1.csv')

In [3]:
# Separate data into rows with non-null heat numbers and rows with missing heat numbers
data_with_heat = data.dropna(subset=['Heat No'])
data_missing_heat = data[data['Heat No'].isnull()]

In [4]:
# Split the data with non-null heat numbers into features (wagon numbers) and labels (heat numbers)
X_train = data_with_heat['Wagon No']
y_train = data_with_heat['Heat No']

In [5]:
# Create a text processing pipeline
pipeline = Pipeline([
    ('vectorizer', CountVectorizer(stop_words=stopwords.words('english'))),
    ('classifier', MultinomialNB())
])

In [6]:
# Train the model
pipeline.fit(X_train, y_train)

In [7]:
# Predict heat numbers for rows with missing heat numbers
predicted_heat_numbers = pipeline.predict(data_missing_heat['Wagon No'])


In [8]:
# Print the predicted values along with their original wagon numbers
for wagon_number, predicted_heat in zip(data_missing_heat['Wagon No'], predicted_heat_numbers):
    print(f"Wagon Number: {wagon_number} - Predicted Heat Number: {predicted_heat}")

Wagon Number: NCR94131810560   - Predicted Heat Number: 23F01059
Wagon Number: NCR94131810560   - Predicted Heat Number: 23F01059
Wagon Number: ECOR56120642279  - Predicted Heat Number: 23E00914
Wagon Number: SCR57091810261   - Predicted Heat Number: 2303183
Wagon Number: SER94072310525   - Predicted Heat Number: 2303295
Wagon Number: ECOR94122111607  - Predicted Heat Number: 23E00910
Wagon Number: SER55079960861   - Predicted Heat Number: 2303292
Wagon Number: SER94071359136   - Predicted Heat Number: 23E00996
Wagon Number: SER94071359136   - Predicted Heat Number: 23E00996
Wagon Number: SER94072310327   - Predicted Heat Number: 2303183
Wagon Number: SER94072310495   - Predicted Heat Number: 23F01023
Wagon Number: ER94021329387    - Predicted Heat Number: 23F01075
Wagon Number: ER94021329387    - Predicted Heat Number: 23F01075
Wagon Number: ER94022110809    - Predicted Heat Number: 23E00996
Wagon Number: SER55070063691   - Predicted Heat Number: 23E00994
Wagon Number: NR94031711264  