# India Agriculture & Development Analysis

Comprehensive analysis of agriculture and development metrics across all Indian states using Machine Learning.


## 1. Data Loading and Exploration


In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data_loader import IndiaDataLoader
from src.preprocessing import DataPreprocessor
from src.visualization import IndiaDataVisualizer


In [None]:
# Initialize data loader
loader = IndiaDataLoader()

# Load data
agri_data = loader.load_agriculture_data()
dev_data = loader.load_development_data()

# Merge data
merged_data = loader.merge_data(agri_data, dev_data)

print(f"Agriculture data shape: {agri_data.shape}")
print(f"Development data shape: {dev_data.shape}")
print(f"Merged data shape: {merged_data.shape}")
print("\nFirst few rows:")
merged_data.head()


## 2. Data Preprocessing


In [None]:
# Initialize preprocessor
preprocessor = DataPreprocessor()

# Handle missing values
processed_data = preprocessor.handle_missing_values(merged_data)

# Create derived features
processed_data = preprocessor.create_features(processed_data)

print(f"Processed data shape: {processed_data.shape}")
print("\nNew features:")
print([col for col in processed_data.columns if col not in merged_data.columns])


## 3. Exploratory Data Analysis


In [None]:
# Initialize visualizer
viz = IndiaDataVisualizer()

# Correlation heatmap
viz.plot_correlation_heatmap(processed_data, save_path='../results/correlation_heatmap.png')
plt.show()


In [None]:
# Agriculture vs Development analysis
viz.plot_agriculture_vs_development(processed_data, save_path='../results/agri_dev_analysis.png')
plt.show()


## 4. Machine Learning Models


In [None]:
from src.models.regression_models import AgricultureDevelopmentRegressor

# Prepare data for regression
X, y = preprocessor.prepare_for_ml(processed_data, target_column='Agricultural_GDP_crores')

# Train regression models
regressor = AgricultureDevelopmentRegressor()
results = regressor.train(X, y)

# Get best model
best_name, best_model = regressor.get_best_model()
print(f"\nBest model: {best_name}")

# Feature importance
if best_name in ['random_forest', 'gradient_boosting']:
    importance = regressor.get_feature_importance(best_name)
    print("\nTop 10 Important Features:")
    print(importance)


In [None]:
from src.models.classification_models import DevelopmentClassifier

# Create development categories
classifier = DevelopmentClassifier()
categories = classifier.create_development_categories(processed_data, target_column='HDI')

# Prepare data for classification
X_clf, y_clf = preprocessor.prepare_for_ml(processed_data, target_column=None)
y_clf = categories[X_clf.index] if len(categories) == len(X_clf) else categories

# Train classification models
clf_results = classifier.train(X_clf, y_clf)

# Get best model
best_clf_name, best_clf_model = classifier.get_best_model()
print(f"\nBest classification model: {best_clf_name}")


In [None]:
from src.models.clustering_models import StateClustering

# Prepare data for clustering
X_cluster = preprocessor.prepare_for_ml(processed_data, target_column=None)

# Find optimal clusters
clusterer = StateClustering()
optimal = clusterer.find_optimal_clusters(X_cluster, max_clusters=8)
print("Optimal Clusters Analysis:")
print(optimal)

# Perform K-Means clustering
labels = clusterer.cluster_kmeans(X_cluster, n_clusters=3)

# Analyze clusters
clustered_df, summary = clusterer.analyze_clusters(processed_data, labels)
print("\nCluster Summary:")
print(summary)


## 5. Results and Insights


In [None]:
# Top states by various metrics
if 'Agricultural_GDP_crores' in processed_data.columns:
    viz.plot_top_states(processed_data, 'Agricultural_GDP_crores', top_n=10,
                       save_path='../results/top_agri_states.png')
    plt.show()

if 'HDI' in processed_data.columns:
    viz.plot_top_states(processed_data, 'HDI', top_n=10,
                       save_path='../results/top_hdi_states.png')
    plt.show()


In [None]:
# Create interactive dashboard
viz.create_dashboard(processed_data, save_path='../results/dashboard.html')
print("\nAnalysis complete! Check the results/ directory for outputs.")
