![image.png](attachment:image.png)

In [5]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

np.random.seed(42)
raw_data = make_moons(n_samples=2000, noise=0.25, random_state=42)
data = raw_data[0]
target = raw_data[1]

X_train, X_test, y_train, y_test = train_test_split(data, target)

In [7]:
# Initializing the model
model = RandomForestClassifier(random_state=42)

# Fitting the model on the training data
model.fit(X_train,y_train)

# Predicting the results on the testing data
y_pred = model.predict(X_test)

# Calculating the accuracy score
score = accuracy_score(y_pred,y_test)
print(f"Accuracy: {score:.4f}")

Accuracy: 0.9300


![image.png](attachment:image.png)

Link :
- https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
- https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

In [8]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

np.random.seed(42)
raw_data = make_moons(n_samples=2000, noise=0.25, random_state=42)
data = raw_data[0]
target = raw_data[1]

X_train, X_test, y_train, y_test = train_test_split(data, target)

In [9]:
# Define the parameter grid
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [6, 7, 8],
    'min_samples_leaf': [4, 5]
}

# Initialize the RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, scoring='accuracy', cv=5)

# Fit GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print(best_params)

# Train the model with the best parameters
best_rf = RandomForestClassifier(**best_params, random_state=42)
best_rf.fit(X_train, y_train)

# Evaluate on the test set
y_pred = best_rf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy: ", test_accuracy)

Best parameters found:  {'criterion': 'entropy', 'max_depth': 7, 'min_samples_leaf': 5}
Test accuracy:  0.932


![image.png](attachment:image.png)

Link : https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html

In [10]:
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer


documents = [
    'python is a programming language',
    'python is popular',
    'programming in python',
    'object-oriented programming in python'
]

In [11]:
# Initialize the vectorizer
vectorizer = CountVectorizer()

# Fit and transform the document
X = vectorizer.fit_transform(documents)

# Convert the result to DataFrame
df = pd.DataFrame(X.toarray(), columns= vectorizer.get_feature_names_out())

# Print the DataFrame
print(df)

   in  is  language  object  oriented  popular  programming  python
0   0   1         1       0         0        0            1       1
1   0   1         0       0         0        1            0       1
2   1   0         0       0         0        0            1       1
3   1   0         0       1         1        0            1       1
