In [None]:
#data-set

import pandas as pd

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

print(df)

In [None]:
#Data preprocessing and Model training

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Load your dataset
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')
df = pd.DataFrame(data)
# Sum the values in columns 'N', 'P', and 'K' for each row
df['Sum'] = df[['N', 'P', 'K']].sum(axis=1)

# Define recommended actions based on rules (example rules)
def get_recommended_action(row):
    if row['N'] < 3:
        return 'Add Fertilizer'
    if row['P'] < 3:
        return 'Add Phosphorus'
    if row['rainfall'] < 150:  
        return 'Increase Watering'
    if row['humidity'] < 60:  
        return 'Provide More Humidity'
    if row['N'] > 4 and row['P'] > 4 and row['K'] > 4:
        return 'Optimal Nutrient Levels'
    return 'No Specific Action'  
    

# Apply the rules to generate recommended actions
df['Recommended_Action'] = df.apply(get_recommended_action, axis=1)

# Handling Missing Values: Drop rows with missing values
data.dropna(inplace=True)
print(df.columns)

# Encoding Categorical Variables (if any):
label_encoder = LabelEncoder()
df['Recommended_Action_Encoded'] = label_encoder.fit_transform(df['Recommended_Action'])

# Define features (X) and target (y)
X = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['Recommended_Action_Encoded']

# Feature Scaling (if necessary):
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model Training: Train a Random Forest Classifier model
clf = RandomForestClassifier()
clf.fit(X_train, y_train)


In [None]:
#Nitrogen-plot

import matplotlib.pyplot as plt
import seaborn as sns

# Create a histogram for the 'N' feature
plt.figure(figsize=(8, 6))
sns.histplot(df['N'], bins=20, kde=True)
plt.title('Distribution of N')
plt.xlabel('N')
plt.ylabel('Frequency')
plt.show()


In [None]:
#phosporous plot

import matplotlib.pyplot as plt

# Create a box plot for a single feature
plt.figure(figsize=(8, 6))
plt.boxplot(df['P'])
plt.title('Box Plot of P')
plt.ylabel('P')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Create a histogram for the 'P' feature
plt.figure(figsize=(8, 6))
sns.histplot(df['P'], bins=20, kde=True)
plt.title('Distribution of P')
plt.xlabel('P')
plt.ylabel('Frequency')
plt.show()


In [None]:
#pair-plot
import seaborn as sns
import matplotlib.pyplot as plt

# Create a pair plot for selected features
sns.set(style="ticks")
sns.pairplot(df[['N', 'P', 'K', 'temperature', 'humidity']], kind='scatter')
plt.show()


In [None]:
#Join plot
sns.jointplot(x="rainfall",y="humidity",data=df[(df['temperature']<40) & 
                                                  (df['rainfall']>40)],height=10,hue="label")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Create a histogram for the 'K' feature
plt.figure(figsize=(8, 6))
sns.histplot(df['K'], bins=20, kde=True)
plt.title('Distribution of K')
plt.xlabel('K')
plt.ylabel('Frequency')
plt.show()

In [None]:
#Correlation matrix
import numpy as np
import pandas as pd 
numeric_df = df.select_dtypes(include=[np.number])
fig, ax = plt.subplots(1, 1, figsize=(15, 9))

sns.heatmap(numeric_df.corr(), annot=True,cmap='viridis')
ax.set(xlabel='features')
ax.set(ylabel='features')

plt.title('Correlation between different features', fontsize = 15, c='black')
plt.show()


In [None]:
#Accuracy by kNN algorithm

import numpy as np
import pandas as pd

# Load your dataset
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')
df = pd.DataFrame(data)
# Define recommended actions based on rules (example rules)
def get_recommended_action(row):
    if row['N'] < 3:
        return 'Add Fertilizer'
    if row['P'] < 3:
        return 'Add Phosphorus'
    if row['rainfall'] < 45:  
        return 'Increase Watering'
    if row['humidity'] < 60:  
        return 'Provide More Humidity'
    if row['N'] > 4 and row['P'] > 4 and row['K'] > 4:
        return 'Optimal Nutrient Levels'
    return 'No Specific Action'  
df['Recommended_Action'] = df.apply(get_recommended_action, axis=1)
features = df[['N', 'P','K','temperature', 'humidity', 'ph', 'rainfall']]
target = df['Recommended_Action']

acc = []
model = []

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features,target,test_size = 0.2,random_state =2)


from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import classification_report

knn = KNeighborsClassifier()

knn.fit(x_test,y_test)

predicted_values = knn.predict(x_test)

x = metrics.accuracy_score(y_test, predicted_values)
acc.append(x)
model.append('K Nearest Neighbours')
print("KNN Accuracy is: ", x)

print(classification_report(y_test,predicted_values))

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Load your dataset
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')
df = pd.DataFrame(data)
# Sum the values in columns 'N', 'P', and 'K' for each row
df['Sum'] = df[['N', 'P', 'K']].sum(axis=1)

# Define recommended actions based on rules (example rules)
def get_recommend_action(row):
    optimal_ratio = {'N': 4, 'P': 2, 'K': 1}
    
    # Calculate the ratio for the current nutrient levels
    current_ratio = {
        'N': row['N'] / optimal_ratio['N'],
        'P': row['P'] / optimal_ratio['P'],
        'K': row['K'] / optimal_ratio['K']
    }

    # Check if any nutrient exceeds or falls below the optimal ratio
    exceeding_nutrients = [nutrient for nutrient in current_ratio if current_ratio[nutrient] > 1]
    below_optimal_nutrients = [nutrient for nutrient in current_ratio if current_ratio[nutrient] < 1]

    # Define recommended actions based on nutrient levels
    if exceeding_nutrients:
        return f"Add {', '.join(exceeding_nutrients)} Fertilizer"
    elif below_optimal_nutrients:
        return f"Increase {', '.join(below_optimal_nutrients)} Fertilizer"
    else:
        return 'Optimal Nutrient Levels'


# Apply the rules to generate recommended actions
df['Recommended_Action'] = df.apply(get_recommended_action, axis=1)

# Handling Missing Values: Drop rows with missing values
data.dropna(inplace=True)
print(df.columns)

# Encoding Categorical Variables (if any):
label_encoder = LabelEncoder()
df['Recommended_Action_Encoded'] = label_encoder.fit_transform(df['Recommended_Action'])

# Define features (X) and target (y)
X = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['Recommended_Action_Encoded']

# Feature Scaling (if necessary):
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model Training: Train a Random Forest Classifier model
clf = RandomForestClassifier()
clf.fit(X_train, y_train)


In [None]:
import pandas as pd

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

# Define recommended actions based on rules (example rules)
def recommend_nutrient_action_individual(row):
    optimal_ratio = {'N': 4, 'P': 2, 'K': 1}
    
    # Calculate the ratio for each nutrient individually
    ratios = {nutrient: row[nutrient] / (row['N'] + row['P'] + row['K']) for nutrient in optimal_ratio.keys()}

    # Initialize lists to store nutrient names
    exceeding_nutrients = []
    below_optimal_nutrients = []
    optimal_nutrients = []

    # Check if any nutrient exceeds, falls below, or is within the optimal ratio
    for nutrient, ratio in ratios.items():
        if ratio > optimal_ratio[nutrient] / sum(optimal_ratio.values()):
            exceeding_nutrients.append(nutrient)
        elif ratio < optimal_ratio[nutrient] / sum(optimal_ratio.values()):
            below_optimal_nutrients.append(nutrient)
        else:
            optimal_nutrients.append(nutrient)

    return exceeding_nutrients, below_optimal_nutrients, optimal_nutrients

# Apply the function to each row
df[['Reduce', 'Increase', 'Opt_N']] = df.apply(recommend_nutrient_action_individual, axis=1).apply(pd.Series)

# Display the DataFrame
print(df)








In [None]:
import pandas as pd

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

# Define recommended actions based on rules (example rules)
def recommend_nutrient_action_individual(row):
    optimal_ratio = {'N': 4, 'P': 2, 'K': 1}
    
    # Calculate the ratio for each nutrient individually
    ratios = {nutrient: row[nutrient] / (row['N'] + row['P'] + row['K']) for nutrient in optimal_ratio.keys()}

    # Initialize lists to store nutrient names
    exceeding_nutrients = []
    below_optimal_nutrients = []
    optimal_nutrients = []

    # Check if any nutrient exceeds, falls below, or is within the optimal ratio
    for nutrient, ratio in ratios.items():
        if ratio > optimal_ratio[nutrient] / sum(optimal_ratio.values()):
            exceeding_nutrients.append(nutrient)
        elif ratio < optimal_ratio[nutrient] / sum(optimal_ratio.values()):
            below_optimal_nutrients.append(nutrient)
        else:
            optimal_nutrients.append(nutrient)

    return exceeding_nutrients, below_optimal_nutrients

# Define recommended actions for water supply based on rainfall and crop-specific optimal conditions
def recommend_water_action(row):
    crop = row['label']
    rainfall = row['rainfall']

    # Crop-specific optimal conditions
    optimal_conditions = {
        'rice': (200, 250),
        'coffee': (140, 200),
        'maize': (65, 105),
        'chickpea': (70, 90),
        'kidneybeans': (90, 140),
        'apple': (90, 140),
        'orange': (90, 140),
        'pigeonpeas': (100, 200),
        'mothbeans': (30, 70),
        'mungbeans': (30, 70),
        'lentil': (30, 70),
        'watermelon': (30, 70),
        'blackgram': (60, 80),
        'grapes': (60, 80),
        'pomegranate': (90, 120),
        'banana': (90, 120),
        'mango': (90, 120),
        'muskmelon': (20, 35),
        'papaya': (150, 250),
        'coconut':(145,225),
        'cotton':(65,90),
        'jute':(150,200)
    }

    # Get optimal conditions for the crop
    optimal_range = optimal_conditions.get(crop)

    if optimal_range:
        # Check if the rainfall is within the optimal range
        if optimal_range[0] < rainfall < optimal_range[1]:
            return 'Optimal'
        elif rainfall <= optimal_range[0]:
            return 'Increase'
        else:
            return 'Reduce'
    else:
        return 'No Recommendation'  # Handle cases where the crop is not in the defined list

def pH_action(row):
    pH = row['ph']  # Assuming the pH column in your DataFrame is named 'pH'

    # Define pH preferences for different crops
    crop_preferences = {
        (6.5, 7.0): ['apple','banana','pomogrante','muskmelon', 'peas', 'beans', 'lentil', 'coconut'],
        (6.5, 7.5): ['rice'],
        (4.9, 6.5): ['coffee'],
        (5.8, 6.0): ['maize'],
        (6.5, 7.8): ['black Gram'],
        (5.8, 6.5): ['cotton'],
        (5.0, 7.4): ['jute'],
        # Add more pH ranges and corresponding crops as needed
    }

    # Check if the crop is in the preferences dictionary
    for pH_range, crops in crop_preferences.items():
        if row['label'] in crops:
            lower_limit, upper_limit = pH_range

            # Compare the pH with the preferred range and recommend an action
            if pH < lower_limit:
                return 'Increase'
            elif lower_limit <= pH <= upper_limit:
                return 'Optimal pH'
            else:
                return 'Reduce'

    return 'No Recommendation'  # Handle cases where the crop is not in the preferences dictionary

# Apply the function to each row
df[['Reduce_Nutrient', 'Increase_Nutrient']] = df.apply(recommend_nutrient_action_individual, axis=1).apply(pd.Series)
df['Water_Action'] = df.apply(recommend_water_action, axis=1)
df['pH_Action'] = df.apply(pH_action, axis=1)
# Display the DataFrame
print(df)


In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

# Define recommended actions based on rules (example rules)
def recommend_nutrient_action_individual(row):
    optimal_ratio = {'N': 4, 'P': 2, 'K': 1}
    
    # Calculate the ratio for each nutrient individually
    ratios = {nutrient: row[nutrient] / (row['N'] + row['P'] + row['K']) for nutrient in optimal_ratio.keys()}

    # Initialize lists to store nutrient names
    exceeding_nutrients = []
    below_optimal_nutrients = []
    optimal_nutrients = []

    # Check if any nutrient exceeds, falls below, or is within the optimal ratio
    for nutrient, ratio in ratios.items():
        if ratio > optimal_ratio[nutrient] / sum(optimal_ratio.values()):
            exceeding_nutrients.append(nutrient)
        elif ratio < optimal_ratio[nutrient] / sum(optimal_ratio.values()):
            below_optimal_nutrients.append(nutrient)
        else:
            optimal_nutrients.append(nutrient)

    return exceeding_nutrients, below_optimal_nutrients
# Apply the function to each row
df[['Reduce', 'Increase']] = df.apply(recommend_nutrient_action_individual, axis=1).apply(pd.Series)

# Combine the 'Reduce', 'Increase', 'Opt_N' columns into a new column 'Action'
df['Action'] = df[['Reduce', 'Increase']].apply(lambda x: ''.join(map(str, x)), axis=1)

# Encode the target column
label_encoder = LabelEncoder()
df['Recommended_Action_Encoded'] = label_encoder.fit_transform(df['Action'])

# Drop the intermediate columns if needed
df = df.drop(['Reduce', 'Increase', 'Action'], axis=1)

# Define features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'rainfall']]
y = df['Recommended_Action_Encoded']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM Classifier
svm_clf = SVC(kernel='linear')
svm_clf.fit(X_train, y_train)

# Predict recommended actions for the test set
y_pred_svm = svm_clf.predict(X_test)

# Decode the predictions back to action labels
y_pred_labels_svm = label_encoder.inverse_transform(y_pred_svm)

# Assuming y_true and y_pred are your true and predicted labels
report = classification_report(y_test, y_pred_svm, zero_division=1)


# Print the SVM classification report
print("SVM Classification Report:")
print(report)






In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load your dataset
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')


# Encode the target column
label_encoder = LabelEncoder()
df['Recommended_Action_Encoded'] = label_encoder.fit_transform(df['label'])




# Define features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity']]
y = df['Recommended_Action_Encoded']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_combined = pd.concat([X_train, X_test])
y_combined = pd.concat([y_train, y_test])

# Fit the label encoder
label_encoder.fit(y_combined)

# Transform y_test
y_test_encoded = label_encoder.transform(y_test)
# Define the model
clf = RandomForestClassifier()

# Define hyperparameters to tune
param_dist = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform randomized search
randomized_search = RandomizedSearchCV(estimator=clf, param_distributions=param_dist, n_iter=10, cv=3, scoring='accuracy', n_jobs=-1)
randomized_search.fit(X_train, y_train)

# Get the best parameters
best_params = randomized_search.best_params_
print("Best Hyperparameters:", best_params)

# Train a model with the best hyperparameters
best_clf = RandomForestClassifier(**best_params)
best_clf.fit(X_train, y_train)

# Predict recommended actions
y_pred = best_clf.predict(X_test)

# Decode the predictions back to action labels
y_pred_labels = label_encoder.inverse_transform(y_pred)
# Print the classification report
# Convert y_test to numeric labels using the label encoder
y_test_encoded = label_encoder.transform(y_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("RFC Accuracy:", accuracy)
# Print the classification report
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_labels,zero_division=1))


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

# Define features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature','rainfall']]
y = df['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

# Create a Random Forest Classifier
rfc = RandomForestClassifier()

# Train the RFC
rfc.fit(X_train, y_train)

# Predict recommended actions for the test set
y_pred_rfc = rfc.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_rfc)

# Print the accuracy
print("RFC Accuracy:", accuracy)

# Print the RFC classification report
report = classification_report(y_test, y_pred_rfc, zero_division=1)
print("RFC Classification Report:")
print(report)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

# Define features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature','rainfall']]
y = df['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=42)

# Create a Random Forest Classifier
rfc = RandomForestClassifier()

# Train the RFC
rfc.fit(X_train, y_train)

# Predict recommended actions for the test set
y_pred_rfc = rfc.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_rfc)

# Print the accuracy
print("RFC Accuracy:", accuracy)

# Print the RFC classification report
report = classification_report(y_test, y_pred_rfc, zero_division=1)
print("RFC Classification Report:")
print(report)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

# Define features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity']]
y = df['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Encode the target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Train a KNN Classifier
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train, y_train_encoded)

# Predict recommended actions for the test set
y_pred_knn = knn_clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test_encoded, y_pred_knn)

# Print the accuracy
print("KNN Accuracy:", accuracy)

# Print the KNN classification report
report = classification_report(y_test_encoded, y_pred_knn, zero_division=1)
print("KNN Classification Report:")
print(report)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Sample data
data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')

# Create a DataFrame
df = pd.DataFrame(data)

# Define features (X) and target (y)
X = df[['N', 'P', 'K', 'temperature', 'humidity','rainfall']]
y = df['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Encode the target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


# Define hyperparameters to tune
param_dist = {
    'n_neighbors': range(1,9),  # Number of neighbors to consider
    'weights': ['uniform', 'distance'],  # Weight function used in prediction
    'p': [1, 2],  # Power parameter for the Minkowski metric
}

# Perform randomized search for hyperparameter tuning
randomized_search = RandomizedSearchCV(estimator=knn_clf, param_distributions=param_dist, n_iter=10, cv=3, scoring='accuracy', n_jobs=-1)
randomized_search.fit(X_train, y_train_encoded)

# Get the best parameters
best_params = randomized_search.best_params_
print("Best Hyperparameters:", best_params)
# Calculate accuracy
accuracy = accuracy_score(y_test_encoded, y_pred_knn)
# Print the accuracy
print("KNN Accuracy:", accuracy)

# Print the KNN classification report
report = classification_report(y_test_encoded, y_pred_knn, zero_division=1)
print("KNN Classification Report:")
print(report)
