Load data from a CSV file into a pandas DataFrame.

In [None]:
import pandas as pd

df = pd.read_csv('data.csv')

Check the data types of each column in the DataFrame.

In [None]:
print(df.dtypes)

Handle missing values by replacing them with the mean of each column.

In [None]:
df.fillna(df.mean(), inplace=True)

Generate an EDA report by visualizing missing values.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
plt.show()

Identify categorical features in the DataFrame.

In [None]:
categorical_features = df.select_dtypes(include=['object']).columns.tolist()

Prepare data by converting categorical features to numerical using one-hot encoding.

In [None]:
df = pd.get_dummies(df, columns=categorical_features, drop_first=True)

Split the dataset into training and test sets.

In [None]:
from sklearn.model_selection import train_test_split
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Create a model using the Random Forest classifier.

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()

Tune the model parameters using GridSearchCV.

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': [100, 200], 'max_depth': [None, 10]}
grid = GridSearchCV(model, param_grid, cv=5)
grid.fit(X_train, y_train)

Evaluate the model by calculating accuracy on the test set.

In [None]:
from sklearn.metrics import accuracy_score
y_pred = grid.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

Predict outcomes using the trained model on the test set.

In [None]:
final_predictions = grid.predict(X_test)

Finalize the model by saving it as a pickle file.

In [None]:
import joblib
joblib.dump(grid, 'final_model.pkl')

Output the final predictions generated by the model.

In [None]:
print(final_predictions)