Load the dataset using pandas.

In [None]:
import pandas as pd

data = pd.read_csv('data.csv')

Check for NaN values in the dataset.

In [None]:
data.isna().sum()

Check the shape of the dataset.

In [None]:
data.shape

Check and drop any duplicate entries.

In [None]:
data = data.drop_duplicates()

Identify potential outliers in a specified column.

In [None]:
import numpy as np

outliers = data[(np.abs(data['column'] - data['column'].mean()) > 3 * data['column'].std())]

Remove identified outliers from the dataset.

In [None]:
data = data[~data.index.isin(outliers.index)]

Create a new label based on 'quality' column.

In [None]:
data['label_quality'] = data['quality'].apply(lambda x: 'High' if x > 5 else 'Low')

Visualize the distribution of quality labels using a bar plot.

In [None]:
import matplotlib.pyplot as plt

plt.bar(data['label_quality'].value_counts().index, data['label_quality'].value_counts())
plt.show()

Create a scatter plot of sulphates vs. alcohol.

In [None]:
plt.scatter(data['sulphates'], data['alcohol'])
plt.xlabel('Sulphates')
plt.ylabel('Alcohol')
plt.show()

Split the dataset into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data.drop('label', axis=1), data['label'], test_size=0.2, random_state=42)

Apply SMOTE to address class imbalance in the training data.

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

Train a model using a Random Forest classifier.

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train)

Make predictions on the testing set.

In [None]:
y_pred = model.predict(X_test)

Evaluate the model using a confusion matrix.

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(y_test, y_pred)

Generate a classification report for detailed evaluation.

In [None]:
from sklearn.metrics import classification_report

report = classification_report(y_test, y_pred)

Calculate feature importance scores from the trained model.

In [None]:
importances = model.feature_importances_

Visualize feature importance scores.

In [None]:
plt.bar(range(len(importances)), importances)
plt.show()