# Random Forest

In [1]:
import pandas as pd
df = pd.read_csv(r'C:\Users\rajes\Downloads\cats_dogs.csv')

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV, train_test_split, KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


X = df['Description']
y = df['Animal']

pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('select', SelectKBest(chi2, k=100)), 
    ('clf', StackingClassifier(
        estimators=[
            ('rf', RandomForestClassifier(random_state=42)),  
            ('gb', GradientBoostingClassifier(random_state=42))
        ],
        final_estimator=DecisionTreeClassifier(random_state=42) 
    ))
])


param_grid = {
    'vectorizer__max_df': [0.8, 0.9, 1.0],
    'vectorizer__min_df': [1, 2, 3],
    'select__k': [50, 100, 150],
    'clf__final_estimator__max_depth': [None, 10, 20],
    'clf__final_estimator__min_samples_split': [2, 5, 10],
    'clf__rf__n_estimators': [50, 100, 150], 
    'clf__rf__max_depth': [None, 10, 20]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=KFold(n_splits=5, shuffle=True, random_state=42), scoring='accuracy')


grid_search.fit(X, y) 

print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.2f}")



X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)
best_pipeline = grid_search.best_estimator_

best_pipeline.fit(X_train, y_train)


y_pred = best_pipeline.predict(X_test)






KeyboardInterrupt: 

# Cross Val Score

In [None]:
cross_val_scores = cross_val_score(grid_search.best_estimator_, X, y, cv=KFold(n_splits=5, shuffle=True, random_state=42), scoring='accuracy')
print(f"Cross-validation scores: {cross_val_scores}")
print(f"Mean cross-validation score: {cross_val_scores.mean():.2f}")


# New Data Prediction

In [None]:
new_data = [
    "New smartphone with advanced features and high performance",
    "A classic phone with basic functionalities"
]

new_predictions = best_pipeline.predict(new_data)
print("Predictions for new data:")
for text, prediction in zip(new_data, new_predictions):
    print(f"Description: {text}\nPredicted Animal: {prediction}\n")
