In [10]:
import numpy as np
import pandas as pd

# Importing Data

In [12]:
crop=pd.read_csv("Crop_recommendation.csv")
crop.head()

# Asq Six Questions to your self

In [14]:
crop.shape

In [15]:
crop.info()

In [16]:
crop.isnull().sum()

In [17]:
crop.duplicated().sum()

In [18]:
crop.describe()

# Exploring Data

In [21]:
import pandas as pd

# Assuming crop is your DataFrame
numeric_cols = crop.select_dtypes(include=[float, int])  # Select only numeric columns
corr = numeric_cols.corr()  # Compute the correlation matrix

print(corr)


In [23]:
import seaborn as sns
sns.heatmap(corr,annot=True,cbar=True, cmap='coolwarm')

In [25]:
crop['label'].value_counts()


In [26]:
import matplotlib.pyplot as plt
sns.distplot(crop['N'])
plt.show()

In [27]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.histplot(crop['N'], kde=True)  # kde=True adds the KDE line similar to distplot
plt.show()

# Encoding

In [28]:
crop_dict = {
    'rice': 1,
    'maize': 2,
    'jute': 3,
    'cotton': 4,
    'coconut': 5,
    'papaya': 6,
    'orange': 7,
    'apple': 8,
    'muskmelon': 9,
    'watermelon': 10,
    'grapes': 11,
    'mango': 12,
    'banana': 13,
    'pomegranate': 14,
    'lentil': 15,
    'blackgram': 16,
    'mungbean': 17,
    'mothbeans': 18,
    'pigeonpeas': 19,
    'kidneybeans': 20,
    'chickpea': 21,
    'coffee': 22
}
crop['crop_num']=crop['label'].map(crop_dict)

In [29]:
crop['crop_num'].value_counts()

In [35]:
#crop.drop(['label'], axis=1,inplace=True)
crop.head()

# Train Test Split

In [50]:
x=crop.drop('crop_num',axis=1)
y=crop['crop_num']

In [51]:
x

In [53]:
x.shape

In [54]:
y.shape

In [62]:
from sklearn.model_selection import train_test_split

In [64]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

import pandas as pd
from sklearn.model_selection import train_test_split

# Example: Loading data from a CSV file
data = pd.read_csv('Crop_recommendation.csv')

# Define features and labels
X = data.drop('temperature', axis=1)  # Replace 'target_column' with the name of your target column
y = data['temperature']

# Now split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [65]:
X_train.shape

In [66]:
X_test.shape

In [67]:
y_train.shape

In [68]:
 y_test.shape

# Scale the features using MinMaxScaler

In [83]:
print(X_train.dtypes)

In [84]:
crop.head()

In [76]:
print(type(X_train))

In [85]:
 X_train

In [88]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Sample data
data = {
    'feature1': [1, 2, 3, 4],
    'feature2': [5, 6, 7, 8],
    'category': ['apple', 'orange', 'banana', 'apple']
}

# Convert to DataFrame
crop = pd.DataFrame(data)

# Separate features and target if applicable
X = crop

# Identify numeric and categorical columns
numeric_features = ['feature1', 'feature2']
categorical_features = ['category']

# Define the transformations for each type of feature
numeric_transformer = MinMaxScaler()
categorical_transformer = OneHotEncoder()

# Create a preprocessor with column transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Fit and transform the training data
X_preprocessed = preprocessor.fit_transform(X)

# If you want to transform the test data, use the same preprocessor
# X_test_preprocessed = preprocessor.transform(X_test)

# Convert the result back to a DataFrame for easier inspection
# Note that the result is a NumPy array, so we need to handle the column names
num_cols = numeric_features
cat_cols = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features).tolist()
all_cols = num_cols + cat_cols
X_preprocessed_df = pd.DataFrame(X_preprocessed, columns=all_cols)

print(X_preprocessed_df)


In [89]:
X_train


# Standarization

In [91]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Sample data
data_train = {
    'feature1': [1, 2, 3, 4],
    'feature2': [5, 6, 7, 8],
    'category': ['apple', 'orange', 'banana', 'apple']
}
data_test = {
    'feature1': [2, 3, 4, 5],
    'feature2': [6, 7, 8, 9],
    'category': ['banana', 'orange', 'apple', 'banana']
}

# Convert to DataFrame
X_train = pd.DataFrame(data_train)
X_test = pd.DataFrame(data_test)

# Identify numeric and categorical columns
numeric_features = ['feature1', 'feature2']
categorical_features = ['category']

# Define the transformations for each type of feature
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),  # Handle missing values if any
    ('scaler', MinMaxScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),  # Handle missing values if any
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Create a preprocessor with column transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Fit and transform the training data
X_train_preprocessed = preprocessor.fit_transform(X_train)

# Transform the test data
X_test_preprocessed = preprocessor.transform(X_test)

# Convert the result back to DataFrame for easier inspection
# Note that the result is a NumPy array, so we need to handle the column names
num_cols = numeric_features
cat_cols = preprocessor.named_transformers_['cat'].named_steps['onehot'].get_feature_names_out(categorical_features).tolist()
all_cols = num_cols + cat_cols

X_train_preprocessed_df = pd.DataFrame(X_train_preprocessed, columns=all_cols)
X_test_preprocessed_df = pd.DataFrame(X_test_preprocessed, columns=all_cols)

print("Preprocessed Training Data:")
print(X_train_preprocessed_df)
print("\nPreprocessed Test Data:")
print(X_test_preprocessed_df)


In [92]:
X_train

# Training Models

In [97]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

# Load your dataset
data = pd.read_csv('Crop_recommendation.csv')

# Print the columns to identify the correct target column name
print(data.columns)

# Define target column name
target_column = 'label'  # Update this with the correct column name

# Ensure the target column is categorical
data[target_column] = data[target_column].astype('category')

# Define features and labels
X = data.drop(target_column, axis=1)
y = data[target_column]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical and numeric columns
categorical_cols = X.select_dtypes(include=['object']).columns
numeric_cols = X.select_dtypes(exclude=['object']).columns

# Preprocessing pipelines
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_cols),
        ('cat', categorical_transformer, categorical_cols)])

# Fit and transform the training data, transform the test data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Create instances of all models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Naive Bayes': GaussianNB(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Bagging': BaggingClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
}

# Fit the models and print accuracy
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name} with accuracy: {accuracy_score(y_test, y_pred)}")


In [98]:
rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
ypred = rfc.predict(X_test)
accuracy_score(y_test,ypred)

# Predictive System

In [105]:
def recommendation(N,P,k,temperature,humidity,ph,rainfal):
    features = np.array([[N,P,k,temperature,humidity,ph,rainfal]])
    transformed_features = ms.fit_transform(features)
    transformed_features = sc.fit_transform(transformed_features)
    prediction = rfc.predict(transformed_features).reshape(1,-1)
    
    return prediction[0] 

In [106]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score
import joblib  # For saving and loading models

class CropRecommendationSystem:
    def __init__(self, data_path, target_column):
        self.data_path = data_path
        self.target_column = target_column
        self.models = {
            'Logistic Regression': LogisticRegression(max_iter=1000),
            'Naive Bayes': GaussianNB(),
            'Support Vector Machine': SVC(),
            'K-Nearest Neighbors': KNeighborsClassifier(),
            'Decision Tree': DecisionTreeClassifier(),
            'Random Forest': RandomForestClassifier(),
            'Bagging': BaggingClassifier(),
            'AdaBoost': AdaBoostClassifier(),
            'Gradient Boosting': GradientBoostingClassifier(),
            'Extra Trees': ExtraTreeClassifier(),
        }
        self.preprocessor = None

    def load_data(self):
        data = pd.read_csv(self.data_path)
        data[self.target_column] = data[self.target_column].astype('category')
        X = data.drop(self.target_column, axis=1)
        y = data[self.target_column]
        return train_test_split(X, y, test_size=0.2, random_state=42)

    def preprocess_data(self, X_train, X_test):
        categorical_cols = X_train.select_dtypes(include=['object']).columns
        numeric_cols = X_train.select_dtypes(exclude=['object']).columns

        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())])

        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        self.preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_cols),
                ('cat', categorical_transformer, categorical_cols)])

        X_train = self.preprocessor.fit_transform(X_train)
        X_test = self.preprocessor.transform(X_test)
        return X_train, X_test

    def train_models(self, X_train, y_train):
        for name, model in self.models.items():
            model.fit(X_train, y_train)
            joblib.dump(model, f'{name}.pkl')  # Save the model to a file

    def evaluate_models(self, X_test, y_test):
        results = {}
        for name, model in self.models.items():
            loaded_model = joblib.load(f'{name}.pkl')  # Load the model from a file
            y_pred = loaded_model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            results[name] = accuracy
            print(f"{name} with accuracy: {accuracy}")
        return results

    def predict(self, X_new):
        X_new = self.preprocessor.transform(X_new)
        predictions = {}
        for name, model in self.models.items():
            loaded_model = joblib.load(f'{name}.pkl')
            predictions[name] = loaded_model.predict(X_new)
        return predictions

# Usage example
data_path = 'Crop_recommendation.csv'
target_column = 'label'

system = CropRecommendationSystem(data_path, target_column)
X_train, X_test, y_train, y_test = system.load_data()
X_train, X_test = system.preprocess_data(X_train, X_test)
system.train_models(X_train, y_train)
results = system.evaluate_models(X_test, y_test)

# Predict on new data (example)
X_new = pd.DataFrame([{'N': 90, 'P': 42, 'K': 43, 'temperature': 20.87, 'humidity': 82.00, 'ph': 6.5, 'rainfall': 202.93}])
predictions = system.predict(X_new)
print(predictions)


In [111]:
import pickle
pickle.dump(rfc,open('model.pkl','wb'))
pickle.dump(ms,open('minmaxscaler.pkl','wb'))
pickle.dump(sc,open('standscaler.pkl','wb'))

In [114]:
# Example input features
input_features = {
    'N': 90,
    'P': 42,
    'K': 43,
    'temperature': 20.87,
    'humidity': 82.00,
    'ph': 6.8,
    'rainfall': 202.93
}

# Predict the best crop
predicted_crop = predict_crop(input_features)
print(f"Predicted crop: {predicted_crop}")


In [115]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

# Load your dataset
data = pd.read_csv('Crop_recommendation.csv')

# Print the columns to identify the correct target column name
print(data.columns)

# Define target column name
target_column = 'label'  # Update this with the correct column name

# Ensure the target column is categorical
data[target_column] = data[target_column].astype('category')

# Define features and labels
X = data.drop(target_column, axis=1)
y = data[target_column]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical and numeric columns
categorical_cols = X.select_dtypes(include=['object']).columns
numeric_cols = X.select_dtypes(exclude=['object']).columns

# Preprocessing pipelines
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_cols),
        ('cat', categorical_transformer, categorical_cols)])

# Fit and transform the training data, transform the test data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Create instances of all models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Naive Bayes': GaussianNB(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Bagging': BaggingClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
}

# Fit the models and print accuracy, store the best model
best_model = None
best_accuracy = 0
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} with accuracy: {accuracy}")
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

print(f"Best model: {best_model} with accuracy: {best_accuracy}")

# Function to make predictions with the best model
def predict_crop(input_features):
    # Create a DataFrame for the input features
    input_df = pd.DataFrame([input_features])
    
    # Preprocess the input features
    input_preprocessed = preprocessor.transform(input_df)
    
    # Predict using the best model
    prediction = best_model.predict(input_preprocessed)
    
    return prediction[0]

# Example input features
input_features = {
    'N': 90,
    'P': 42,
    'K': 43,
    'temperature': 20.87,
    'humidity': 82.00,
    'ph': 6.8,
    'rainfall': 202.93
}

# Predict the best crop
predicted_crop = predict_crop(input_features)
print(f"Predicted crop: {predicted_crop}")
