In [28]:
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris()
data = pd.DataFrame(iris.data, columns=iris.feature_names)
data['target'] = iris.target

data.to_csv('data.csv', index=False)


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

data = pd.read_csv('data.csv')

X = data.drop('target', axis=1)
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")

joblib.dump(model, 'model.pkl')
print("Model saved as 'model.pkl'.")


Model Accuracy: 1.0
Model saved as 'model.pkl'.


In [30]:
import pandas as pd
import joblib
import logging

logging.basicConfig(filename='batch_prediction.log', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

logging.info('Batch prediction process started.')

try:
    model = joblib.load('model.pkl')
    logging.info('Model loaded successfully.')

    data = pd.read_csv('data.csv')
    logging.info('Data loaded successfully.')

    data.fillna(0, inplace=True)
    logging.info('Data preprocessed (missing values filled).')

    if 'target' in data.columns:
        data = data.drop('target', axis=1)
        logging.info("'target' column dropped for predictions.")

    predictions = model.predict(data)
    logging.info('Predictions made successfully.')

    output = pd.DataFrame({'Prediction': predictions})
    output.to_csv('predictions.csv', index=False)
    logging.info('Predictions saved to predictions.csv.')

except Exception as e:
    logging.error(f"An error occurred: {e}")

logging.info('Batch prediction process completed.')
