## **Project Samarth**


# Step 1: Install Libraries


In [21]:
import requests
import pandas as pd
from transformers import pipeline

# Step 3: Data Discovery & Integration
# For sourcing
# crop_url = "/content/drive/MyDrive/PROJECT SAMARTH/crop_production.csv"  # Example: Replace with real URL
# climate_url = "/content/drive/MyDrive/PROJECT SAMARTH/weather-1.csv"  #

# For now, using local CSVs (upload to Colab: crop_production.csv, climate_data.csv)
# Assume crop_production.csv has columns:State', 'District', 'Year', 'Season', 'Crop', 'Area', 'Production
# Assume climate_data.csv has columns:State', 'District', 'Temperature', 'Condition', 'Humidity ', 'Wind Speed', 'Year

In [22]:
crop_df = pd.read_csv('/content/drive/MyDrive/PROJECT SAMARTH/crop_production.csv')  # Upload your file
climate_df = pd.read_csv('/content/drive/MyDrive/PROJECT SAMARTH/weather-1.csv')  # Upload your file

  climate_df = pd.read_csv('/content/drive/MyDrive/PROJECT SAMARTH/weather-1.csv')  # Upload your file


# Clean and Integrate


In [23]:
# Clean and Integrate
crop_df['State'] = crop_df['State'].str.lower().str.strip()  # Normalize
climate_df['State'] = climate_df['State'].str.lower().str.strip()
integrated_df = pd.merge(crop_df, climate_df, on=['State', 'Year'], how='outer')  # Merge on State and Year
integrated_df.fillna(0, inplace=True)  # Handle missing values

In [24]:
print("Crop DF columns:", crop_df.columns.tolist())
print("Climate DF columns:", climate_df.columns.tolist())

Crop DF columns: ['State', 'District', 'Year', 'Season', 'Crop', 'Area', 'Production']
Climate DF columns: ['State', 'District', 'Temperature', 'Condition', 'Humidity ', 'Wind Speed', 'Year']


In [25]:
print(integrated_df.head())

                         State District_x  Year       Season  \
0  andaman and nicobar islands          0  1999            0   
1  andaman and nicobar islands          0  1999            0   
2  andaman and nicobar islands          0  1999            0   
3  andaman and nicobar islands   NICOBARS  2000  Kharif        
4  andaman and nicobar islands   NICOBARS  2000  Kharif        

                  Crop    Area  Production                District_y  \
0                    0     0.0         0.0                   Nicobar   
1                    0     0.0         0.0  North and Middle Andaman   
2                    0     0.0         0.0             South Andaman   
3             Arecanut  1254.0      2000.0                         0   
4  Other Kharif pulses     2.0         1.0                         0   

   Temperature          Condition  Humidity   Wind Speed  
0         27.2  Light rain shower       80.0        27.7  
1         27.2  Light rain shower       80.0        27.7  
2    

# Step 4: Q&A System
# Simple NLP for entity extraction (using Hugging Face)

In [26]:
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")


config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


In [27]:
def parse_question(question):
    # Full list of Indian states/UTs (lowercased)
    states = [
        'andhra pradesh', 'arunachal pradesh', 'assam', 'bihar', 'chhattisgarh', 'goa', 'gujarat', 'haryana',
        'himachal pradesh', 'jharkhand', 'karnataka', 'kerala', 'madhya pradesh', 'maharashtra', 'manipur',
        'meghalaya', 'mizoram', 'nagaland', 'odisha', 'punjab', 'rajasthan', 'sikkim', 'tamil nadu', 'telangana',
        'tripura', 'uttar pradesh', 'uttarakhand', 'west bengal', 'delhi', 'jammu and kashmir', 'ladakh',
        'puducherry', 'chandigarh', 'dadra and nagar haveli and daman and diu', 'lakshadweep', 'andaman and nicobar islands'
    ]
    question_lower = question.lower().replace('state of ', '').replace('state ', '')  # Handle variations like "State of Karnataka"
    found_states = [s for s in states if s in question_lower]
    print(f"Debug: Question lower: {question_lower}")  # Temporary debug
    print(f"Debug: Found states: {found_states}")  # Temporary debug
    state1 = found_states[0] if len(found_states) > 0 else None
    state2 = found_states[1] if len(found_states) > 1 else None
    crop = None  # Extend for crops if needed
    region = None
    return {'state1': state1, 'state2': state2, 'crop': crop, 'region': region}


In [28]:
def answer_question(question):
    try:
        parsed = parse_question(question)
        state1 = parsed.get('state1', '').lower().strip() if parsed.get('state1') else None
        state2 = parsed.get('state2', '').lower().strip() if parsed.get('state2') else None

        if not state1 or not state2:
            return "Error: Could not parse states from question. Try: 'Compare production in Karnataka and Maharashtra'."

        n = 5  # Last 5 years
        max_year = int(integrated_df['Year'].max())
        years = list(range(max_year - n + 1, max_year + 1))

        data1 = integrated_df[(integrated_df['State'] == state1) & (integrated_df['Year'].isin(years))]
        data2 = integrated_df[(integrated_df['State'] == state2) & (integrated_df['Year'].isin(years))]

        if data1.empty or data2.empty:
            return f"Error: No data found for {state1.title()} or {state2.title()} in the last {n} years."

        question_lower = question.lower()

        # Check if the question is about rainfall
        if "rainfall" in question_lower:
            if 'Rainfall_mm' in data1.columns and 'Rainfall_mm' in data2.columns:
                avg_rainfall1 = data1['Rainfall_mm'].mean()
                avg_rainfall2 = data2['Rainfall_mm'].mean()
                answer = f"Average rainfall in {state1.title()} (last {n} years): {avg_rainfall1:.2f} mm. Average rainfall in {state2.title()} (last {n} years): {avg_rainfall2:.2f} mm. Sources: Climate data from relevant sources."
            else:
                answer = "Rainfall data not available in the expected format."
        # Check if the question is about temperature
        elif "temperature" in question_lower:
            if 'Temperature' in data1.columns and 'Temperature' in data2.columns:
                avg_temp1 = data1['Temperature'].mean()
                avg_temp2 = data2['Temperature'].mean()
                answer = f"Average temperature in {state1.title()} (last {n} years): {avg_temp1:.2f}°C. Average temperature in {state2.title()} (last {n} years): {avg_temp2:.2f}°C. Sources: Climate data from relevant sources."
            else:
                answer = "Temperature data not available in the expected format."
        # Check if the question is about humidity
        elif "humidity" in question_lower:
            if 'Humidity ' in data1.columns and 'Humidity ' in data2.columns: # Note the space after Humidity
                avg_humidity1 = data1['Humidity '].mean()
                avg_humidity2 = data2['Humidity '].mean()
                answer = f"Average humidity in {state1.title()} (last {n} years): {avg_humidity1:.2f}%. Average humidity in {state2.title()} (last {n} years): {avg_humidity2:.2f}%. Sources: Climate data from relevant sources."
            else:
                 answer = "Humidity data not available in the expected format."
        # Check if the question is about production
        elif "production" in question_lower or "crop" in question_lower:
            # For production: Compare top crops by volume and average production
            top_crop1 = data1.groupby('Crop')['Production'].sum().idxmax() if not data1['Crop'].empty else "No crop data"
            top_prod1 = data1.groupby('Crop')['Production'].sum().max() if not data1['Crop'].empty else 0
            top_crop2 = data2.groupby('Crop')['Production'].sum().idxmax() if not data2['Crop'].empty else "No crop data"
            top_prod2 = data2.groupby('Crop')['Production'].sum().max() if not data2['Crop'].empty else 0
            answer = f"Top produced crop in {state1.title()}: {top_crop1} (Volume: {top_prod1:.2f}). In {state2.title()}: {top_crop2} (Volume: {top_prod2:.2f}). Sources: Crop data from Ministry of Agriculture & Farmers Welfare."
        else:
            answer = "I can provide information about crop production, rainfall, temperature, or humidity. Please rephrase your question."

        return answer
    except Exception as e:
        return f"Error processing question: {str(e)}. Check data or question format."

In [29]:
print("Crop DF columns:", crop_df.columns.tolist())
print("Climate DF columns:", climate_df.columns.tolist())
print("Integrated DF columns:", integrated_df.columns.tolist())
print(integrated_df.head())

Crop DF columns: ['State', 'District', 'Year', 'Season', 'Crop', 'Area', 'Production']
Climate DF columns: ['State', 'District', 'Temperature', 'Condition', 'Humidity ', 'Wind Speed', 'Year']
Integrated DF columns: ['State', 'District_x', 'Year', 'Season', 'Crop', 'Area', 'Production', 'District_y', 'Temperature', 'Condition', 'Humidity ', 'Wind Speed']
                         State District_x  Year       Season  \
0  andaman and nicobar islands          0  1999            0   
1  andaman and nicobar islands          0  1999            0   
2  andaman and nicobar islands          0  1999            0   
3  andaman and nicobar islands   NICOBARS  2000  Kharif        
4  andaman and nicobar islands   NICOBARS  2000  Kharif        

                  Crop    Area  Production                District_y  \
0                    0     0.0         0.0                   Nicobar   
1                    0     0.0         0.0  North and Middle Andaman   
2                    0     0.0         0.0 

Test a function  to check if it outputs a response.

---



Humidity

In [30]:
print(answer_question("Compare humidity in Andhra Pradesh and Assam"))

Debug: Question lower: compare humidity in andhra pradesh and assam
Debug: Found states: ['andhra pradesh', 'assam']
Average humidity in Andhra Pradesh (last 5 years): 50.61%. Average humidity in Assam (last 5 years): 39.14%. Sources: Climate data from relevant sources.


In [31]:
print(answer_question("Compare Humidity in Karnataka and Maharashtra"))

Debug: Question lower: compare humidity in karnataka and maharashtra
Debug: Found states: ['karnataka', 'maharashtra']
Average humidity in Karnataka (last 5 years): 41.25%. Average humidity in Maharashtra (last 5 years): 17.75%. Sources: Climate data from relevant sources.


production

In [32]:
print(answer_question("Compare production in Karnataka and Maharashtra"))

Debug: Question lower: compare production in karnataka and maharashtra
Debug: Found states: ['karnataka', 'maharashtra']
Top produced crop in Karnataka: Sugarcane (Volume: 551404144.00). In Maharashtra: Sugarcane (Volume: 706435600.00). Sources: Crop data from Ministry of Agriculture & Farmers Welfare.


In [33]:
print(answer_question("Compare production in Andhra Pradesh and Assam"))

Debug: Question lower: compare production in andhra pradesh and assam
Debug: Found states: ['andhra pradesh', 'assam']
Top produced crop in Andhra Pradesh: Coconut  (Volume: 21283845445.00). In Assam: Rice (Volume: 66992165.00). Sources: Crop data from Ministry of Agriculture & Farmers Welfare.


Rainfall

In [34]:
print(answer_question("Compare rainfall in Karnataka and Maharashtra"))

Debug: Question lower: compare rainfall in karnataka and maharashtra
Debug: Found states: ['karnataka', 'maharashtra']
Rainfall data not available in the expected format.


Temperature

In [35]:
print(answer_question("Compare temperature in Karnataka and Maharashtra"))

Debug: Question lower: compare temperature in karnataka and maharashtra
Debug: Found states: ['karnataka', 'maharashtra']
Average temperature in Karnataka (last 5 years): 18.38°C. Average temperature in Maharashtra (last 5 years): 12.90°C. Sources: Climate data from relevant sources.


In [36]:
print(answer_question("Compare temperature in Andhra Pradesh and Assam"))

Debug: Question lower: compare temperature in andhra pradesh and assam
Debug: Found states: ['andhra pradesh', 'assam']
Average temperature in Andhra Pradesh (last 5 years): 18.95°C. Average temperature in Assam (last 5 years): 14.80°C. Sources: Climate data from relevant sources.


# Step 5: Front-End with Gradio



In [37]:
!pip install gradio



 OUTPUT :                                                                     
 In the web interface: Enter a sample question (e.g., "Compare average Temperature in Karnataka and Maharashtra for the last 5 years"). Click "Get Answer". Verify the response appears with data and citations (e.g., "Average temperature in Karnataka (last 5 years): 18.38°C. Average temperature in Maharashtra (last 5 years): 12.90°C. Sources: Climate data from relevant sources."). Test 2-3 sample questions to ensure end-to-end functionality.

In [38]:
import gradio as gr
import pandas as pd
from transformers import pipeline

# Copy your data loading and functions here (e.g., integrated_df, parse_question, answer_question)

def gradio_interface(question):
    return answer_question(question)

iface = gr.Interface(fn=gradio_interface, inputs="text", outputs="text", title="Project Samarth: Agricultural Q&A System")
iface.launch(share=True)  # Generates a public link automatically

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d2bcb50b49b19d7e33.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Deployment

In [39]:
!pip install huggingface_hub gradio
from huggingface_hub import login
login()



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [40]:
import pandas as pd
import transformers
import gradio

print(f"Pandas version: {pd.__version__}")
print(f"Transformers version: {transformers.__version__}")
print(f"Gradio version: {gradio.__version__}")

Pandas version: 2.2.2
Transformers version: 4.57.1
Gradio version: 5.49.1


In [41]:
!rm -rf project-samarth

In [42]:
!pip install -q huggingface_hub gradio

In [43]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [44]:
from huggingface_hub import create_repo

# replace yourusername with your actual HF username
create_repo(
    "project-samarth",
    repo_type="space",
    space_sdk="gradio",
    private=False
)

HfHubHTTPError: 409 Client Error: Conflict for url: https://huggingface.co/api/repos/create (Request ID: Root=1-6904bc50-608fae8c6f3f8e1d78245a23;f6a198f7-71b3-44a8-81f6-41f1faf89bb7)

You already created this space repo: Prajwalchavan/project-samarth

In [None]:
!git clone https://huggingface.co/spaces/prajwalchavan/project-samarth

In [None]:
!cp app.py requirements.txt crop_production.csv weather-1.csv project-samarth/

In [None]:
!git config --global user.email "prajwalchavan354@gmail.com"
!git config --global user.name "Prajwal Chavan"

In [None]:
%cd project-samarth
!git add .
!git commit -m "Initial Project Samarth upload"
!git push