Load data from a CSV file using pandas.

In [None]:
import pandas as pd
data = pd.read_csv('data.csv')

Perform Exploratory Data Analysis (EDA) to visualize data distributions.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='whitegrid')
sns.boxplot(data=data)

Handle missing values by forward filling.

In [None]:
data.fillna(method='ffill', inplace=True)

Clean the text data by removing special characters.

In [None]:
data['text'] = data['text'].str.replace('[^a-zA-Z0-9 ]', '', regex=True)

Prepare vocabulary using Count Vectorization.

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
vocabulary = vectorizer.fit_transform(data['text'])

Split data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(vocabulary, data['label'], test_size=0.2)

Train a model using Naive Bayes.

In [None]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(X_train, y_train)

Generate predictions using the trained model.

In [None]:
predictions = model.predict(X_test)

Generate output and save predictions to a CSV file.

In [None]:
output = pd.DataFrame({'Id': data['id'].iloc[X_test.index], 'Prediction': predictions})
output.to_csv('predictions.csv', index=False)