Load the dataset from a CSV file into a pandas DataFrame.

In [None]:
import pandas as pd

df = pd.read_csv('data.csv')

Inspect the first few rows of the dataset.

In [None]:
df.head()

Check for and count the number of duplicate rows in the DataFrame.

In [None]:
duplicates = df.duplicated().sum()

Display information about the DataFrame, including column types and non-null counts.

In [None]:
df.info()

Perform a correlation analysis to find relationships between numerical features.

In [None]:
correlation = df.corr()

Count the number of unique values in each column of the DataFrame.

In [None]:
unique_values = df.nunique()

Visualize the unique values in the 'Category' column using a count plot.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.countplot(x='Category', data=df)
plt.show()

Select the best features from the DataFrame based on ANOVA F-value.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif
X = df.drop('target', axis=1)
y = df['target']
selector = SelectKBest(score_func=f_classif, k='all')
selector.fit(X, y)
selected_features = selector.get_support(indices=True)

Train a Random Forest model using the selected features.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)

Evaluate the model's performance by calculating the accuracy.

In [None]:
from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

Visualize the predictions against one of the feature dimensions.

In [None]:
plt.scatter(X_test['feature1'], y_pred)
plt.xlabel('Feature 1')
plt.ylabel('Predictions')
plt.title('Predictions vs Feature 1')
plt.show()