#**Step 1: Install required libraries**

In [None]:
!pip install pandas numpy scikit-learn

#**Step 2: Upload the CSV**

In [None]:
from google.colab import files
uploaded = files.upload()

#**Step 3: Load the dataset**

In [None]:
import pandas as pd

df = pd.read_csv('data_core.csv')
df.head()


# **Step 4:Solve the Probable Questions**

## **1.What is the average soil moisture across all samples?**

In [None]:
df['Moisture'].mean()

# **Explanation:-**
Gives average water content in soil helps irrigation planning.

##**2. Which soil type appears most frequently?**

In [None]:
df['Soil Type'].mode()[0]

# **Explanation:-**
Most common soil type helps understand dataset composition and agricultural patterns.

## **3. What crop type is most commonly grown?**

In [None]:
df['Crop Type'].mode()[0]

#**Explanation:-**
Identifies the crop grown the most times, showing farmers’ crop preferences.

## **4. What is the correlation between Temperature and Soil Moisture?**

In [None]:
df[['Temparature', 'Moisture']].corr()

# **Explanation:-**
High temperature usually reduces moisture.

## **5. Which fertilizer is applied to the highest number of records?**

In [None]:
df['Fertilizer Name'].value_counts().idxmax()

# **Explanation:-**
Identifies the most used fertilizer in the dataset.

## **6. What is the average nitrogen content for each soil type?**

In [None]:
df.groupby('Soil Type')['Nitrogen'].mean()

# **Explanation:-**
Shows how nitrogen levels vary among different soil types, helpful for fertility assessment.

##**7. Which crop type requires the highest moisture on average?**

In [None]:
df.groupby('Crop Type')['Moisture'].mean().idxmax()

#**Explanation:-**
Finds which crop needs more water, helping irrigation decision-making.

##**8. What are the optimal temperature ranges for each crop? (mean ± std)**

In [None]:
df.groupby('Crop Type')['Temparature'].agg(['mean','std'])

#**Explanation:-**
Gives average temperature and variation for each crop → helps define ideal growing conditions.

##**9. Identify samples where Nitrogen = 0 (deficiency detection).**

In [None]:
df[df['Nitrogen'] == 0]

#**Explanation:-**
Finds samples with nitrogen deficiency, important for recommending fertilizer.

##**10. Predict Fertilizer Name using ML Classification**

In [None]:
#Decision Tree Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier

df_ml = df.copy()
for col in ['Soil Type','Crop Type','Fertilizer Name']:
    df_ml[col] = LabelEncoder().fit_transform(df_ml[col])

X = df_ml.drop('Fertilizer Name', axis=1)
y = df_ml['Fertilizer Name']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
model.score(X_test, y_test)

#**Explanation:-**
Builds a simple ML model to predict the correct fertilizer based on soil and crop features.

##**11. Which soil type has the highest nutrient imbalance?**

#**Definition:-**
Imbalance = |N−P| + |P−K| + |K−N|

In [None]:
df['imbalance'] = abs(df['Nitrogen']-df['Phosphorous']) + abs(df['Phosphorous']-df['Potassium']) + abs(df['Potassium']-df['Nitrogen'])
df.groupby('Soil Type')['imbalance'].mean().idxmax()

#**Explanation:-**
Finds which soil type has the worst nutrient variation, indicating poor soil health.

##**12. Find the hottest sample (max temperature).**

In [None]:
df.loc[df['Temparature'].idxmax()]

#**Explanation:-**
Returns the record with highest temperature, showing extreme climate conditions.

##**13. Which fertilizer is used for high moisture soils (>60%)?**

In [None]:
df[df['Moisture'] > 60]['Fertilizer Name'].value_counts()

#**Explanation:-**
Finds fertilizers commonly applied in wet soil conditions.

##**14. Relationship between humidity and fertilizer choice.**

In [None]:
df.groupby('Fertilizer Name')['Humidity'].mean()

#**Explanation:-**
Shows average humidity for each fertilizer type → helps understand usage patterns.

##**15. Predict Crop Type using ML Classification**

In [None]:
y = df_ml['Crop Type']

#**Explanation:-**
ML model predicts the best crop based on soil parameters.

## **16. Cluster soil samples using KMeans (Unsupervised ML)**

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder

df2 = df.copy()

# Identify categorical (object type) columns
cat_cols = df2.select_dtypes(include='object').columns

# Encode all categorical columns
for col in cat_cols:
    df2[col] = LabelEncoder().fit_transform(df2[col])

# Now run KMeans
kmeans = KMeans(n_clusters=3, random_state=42)
df2['Cluster'] = kmeans.fit_predict(df2)

df2[['Temparature','Humidity','Moisture','Cluster']].head()


# **Explanation:-**
Groups samples into clusters to identify natural soil categories without labels.

##**17. What crop type requires the highest nitrogen on average?**

In [None]:
df.groupby('Crop Type')['Nitrogen'].mean().idxmax()

# **Explanation:-**
Shows which crop needs more nitrogen, important for fertilizer management.

##**18. Does any soil type consistently require Potassium?**

In [None]:
df.groupby('Soil Type')['Potassium'].mean()

#**Explanation:-**
Finds soil types with higher potassium demand.

##**19. Which crop is grown in the most humid conditions?**

In [None]:
df.groupby('Crop Type')['Humidity'].mean().idxmax()

#**Explanation:-**
Identifies crops suited for high humidity environments.

## **20. Classify soil as Dry / Normal / Wet**

In [None]:
def moisture_label(m):
    if m < 40: return "Dry"
    elif m <= 60: return "Normal"
    else: return "Wet"

df['Moisture_Class'] = df['Moisture'].apply(moisture_label)

#**Explanation:-**
Creates a simple rule-based classification of soil moisture levels.

##**21. Build a simple fertilizer recommendation system**

In [None]:
def recommend(row):
    if row['Nitrogen'] < 10:
        return "Urea"
    if row['Phosphorous'] < 10:
        return "DAP"
    if row['Nitrogen'] < 20 and row['Phosphorous'] < 20:
        return "14-35-14"
    return "28-28"

df['Recommended_Fertilizer'] = df.apply(recommend, axis=1)

#**Explanation:-**
A basic rule-based model suggesting fertilizers depending on nutrient deficiency.