In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

# Load the dataset from the CSV file
data = pd.read_csv("iris.csv")

# Display target and feature names
print("Target Names:", data['species'].unique())  # Assuming 'species' is the target column
print("Feature Names:", data.columns[:-1])  # Assuming the last column is the target, so excluding it for features

# Separate features (X) and target variable (y)
X = data.iloc[:, :-1]  # Features (all columns except the last one assumed as target)
y = data.iloc[:, -1]   # Target variable (last column assumed as target)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

# Creating a Random Forest Classifier
RF = RandomForestClassifier(n_estimators=100)

# Training the model on the training dataset
RF.fit(X_train, y_train)

# Performing predictions on the test dataset
y_pred = RF.predict(X_test)

# Calculating accuracy
accuracy = metrics.accuracy_score(y_test, y_pred)
print("ACCURACY OF THE MODEL:", accuracy )

# Predicting the species for new data
new_data = [[5.0, 3.3, 1.4, 0.2],[6.0,2.2,5.0,1.5]]  # Example new data
predicted_species = RF.predict(new_data)
print("Predicted species for new data:", predicted_species)

# Checking feature importance
feature_imp = pd.Series(RF.feature_importances_, index=X.columns).sort_values(ascending=False)
print("Feature Importance:\n", feature_imp)


Target Names: ['setosa' 'versicolor' 'virginica']
Feature Names: Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='object')
ACCURACY OF THE MODEL: 0.9555555555555556
Predicted species for new data: ['setosa' 'versicolor']
Feature Importance:
 petal_length    0.451880
petal_width     0.413923
sepal_length    0.118430
sepal_width     0.015767
dtype: float64




In [None]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

iris = datasets.load_iris()
print(iris.target_names)
print(iris.feature_names)

# dividing the datasets into two parts i.e. training datasets and test datasets 
X, y = datasets.load_iris(return_X_y=True) 

# Splitting arrays or matrices into random train and test subsets 
# i.e. 70% training dataset and 30% test datasets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30) 

# creating dataframe of IRIS dataset 
data = pd.DataFrame({'sepallength': iris.data[:, 0], 'sepalwidth': iris.data[:, 1],
                     'petallength': iris.data[:, 2], 'petalwidth': iris.data[:, 3],
                     'species': iris.target})

# printing the top 5 datasets in iris dataset 
print(data.head()) 

# creating a RF classifier 
clf = RandomForestClassifier(n_estimators=100)

# Training the model on the training dataset 
# fit function is used to train the model using the training sets as parameters 
clf.fit(X_train, y_train)

# performing predictions on the test dataset 
y_pred = clf.predict(X_test)

print()

# using metrics module for accuracy calculation 
print("ACCURACY OF THE MODEL:", metrics.accuracy_score(y_test, y_pred))

# predicting which type of flower it is. 
clf.predict([[3, 3, 2, 2]])

# Create a Random forest Classifier 
clf = RandomForestClassifier(n_estimators=100)

# Train the model using the training sets 
clf.fit(X_train, y_train)

# using the feature importance variable 
feature_imp = pd.Series(clf.feature_importances_, index=iris.feature_names).sort_values(ascending=False)
feature_imp
print(feature_imp)