Load the dataset from a CSV file.

In [None]:
import pandas as pd

df = pd.read_csv('file_path.csv')

Get an overview of the dataset structure and data types.

In [None]:
df.info()

Generate summary statistics of numerical columns.

In [None]:
df.describe()

Check for missing values in each column.

In [None]:
missing_values = df.isnull().sum()

Visualize correlations between features using a heatmap.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

Clean the dataset by removing rows with missing values.

In [None]:
df.dropna(inplace=True)

Create a count plot for the rating column.

In [None]:
sns.countplot(x='rating', data=df)
plt.show()

Create a count plot for the cocoa percentage column.

In [None]:
sns.countplot(x='cocoa_percent', data=df)
plt.show()

Prepare features and target, then split data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
features = df.drop('target_column', axis=1)
target = df['target_column']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=1)

Train a Random Forest model on the training data.

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

Make predictions using the trained model.

In [None]:
predictions = model.predict(X_test)

Evaluate model performance using a confusion matrix.

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, predictions)

Display a count plot of the final ratings predicted by the model.

In [None]:
sns.countplot(x=predictions)
plt.show()