### Research Question 3: Clustering States into PMJDY Performance Profiles

**Objective**: Can we identify distinct state profiles based on financial inclusion metrics?

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from scipy.spatial.distance import pdist, squareform
from scipy.stats import f_oneway, kruskal
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Set random seed for reproducibility
np.random.seed(42)

### STEP 1: DATA LOADING AND FEATURE SELECTION

In [7]:
print("STEP 1: DATA LOADING AND FEATURE SELECTION")
print("-" * 50)

# Load the preprocessed full dataset
df = pd.read_csv('ml_preprocessed_full.csv')
print(f" Loaded preprocessed dataset: {df.shape}")

# Define clustering features based on methodology
clustering_features = [
    'Jan25_Op_Rate',           # Operative Account Rate (%)
    'RuPay_Penetration',       # RuPay Card Penetration (%)
    'Rural_Percent',           # Rural Dominance Index
    'Avg_Balance_Rs',          # Average Balance per Account (₹)
    'CAGR_2020_25'            # Account Growth Rate 2020-2025 (%)
]

# Check for availability of features
available_features = []
missing_features = []

for feature in clustering_features:
    if feature in df.columns:
        available_features.append(feature)
    else:
        missing_features.append(feature)

if missing_features:
    print(f"\n Missing features: {missing_features}")
    print("Attempting to find alternative column names...")
    
    # Try alternative names
    alternatives = {
        'RuPay_Penetration': ['Jul25_RuPay_Penetration', 'RuPay_Cards', 'High_RuPay_Flag'],
        'Avg_Balance_Rs': ['Jul25_Avg_Balance', 'Balance_Rupees', 'High_Balance_Flag']
    }
    
    for missing in missing_features:
        if missing in alternatives:
            for alt in alternatives[missing]:
                if alt in df.columns:
                    available_features.append(alt)
                    print(f"  → Using '{alt}' instead of '{missing}'")
                    break

# Update clustering features list
clustering_features = available_features

print(f"\n Selected {len(clustering_features)} clustering features:")
for i, feature in enumerate(clustering_features, 1):
    print(f"  {i}. {feature}")

# Extract state names for labeling
if 'State_Name_Std' in df.columns:
    state_names = df['State_Name_Std'].values
elif 'State/UT' in df.columns:
    state_names = df['State/UT'].values
else:
    state_names = [f"State_{i}" for i in range(len(df))]

# Prepare clustering data
X = df[clustering_features].copy()

print(f"\n Clustering data shape: {X.shape}")
print(f" Number of states: {len(state_names)}")

STEP 1: DATA LOADING AND FEATURE SELECTION
--------------------------------------------------
 Loaded preprocessed dataset: (36, 61)

 Selected 5 clustering features:
  1. Jan25_Op_Rate
  2. RuPay_Penetration
  3. Rural_Percent
  4. Avg_Balance_Rs
  5. CAGR_2020_25

 Clustering data shape: (36, 5)
 Number of states: 36


### STEP 2: DATA PREPROCESSING FOR CLUSTERING

In [10]:
print("STEP 2: DATA PREPROCESSING FOR CLUSTERING")
print("-" * 50)

# Check for missing values
missing_values = X.isnull().sum()
if missing_values.sum() > 0:
    print("\nHandling missing values:")
    print(missing_values[missing_values > 0])
    X = X.fillna(X.median())
    print(" Missing values imputed with median")

# Handle outliers using IQR method
print("\nHandling outliers using IQR method:")
Q1 = X.quantile(0.25)
Q3 = X.quantile(0.75)
IQR = Q3 - Q1

# Cap outliers at 1.5×IQR
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers_count = 0
for col in X.columns:
    outliers_mask = (X[col] < lower_bound[col]) | (X[col] > upper_bound[col])
    outliers_count += outliers_mask.sum()
    X.loc[X[col] < lower_bound[col], col] = lower_bound[col]
    X.loc[X[col] > upper_bound[col], col] = upper_bound[col]

print(f" Capped {outliers_count} outlier values")

# Standardize features (Z-score normalization)
print("\nStandardizing features:")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=clustering_features)

print(" Features standardized using Z-score normalization")
print("\nFeature statistics after scaling:")
print(X_scaled_df.describe().round(3))

### STEP 3: OPTIMAL K DETERMINATION

print("STEP 3: DETERMINING OPTIMAL NUMBER OF CLUSTERS")
print("-" * 50)

# Test different values of k
k_range = range(2, 8)
metrics = {
    'k': [],
    'inertia': [],
    'silhouette': [],
    'calinski': [],
    'davies_bouldin': []
}

print("\nEvaluating different k values:")
print("-" * 40)

for k in k_range:
    kmeans = KMeans(n_clusters=k, n_init=50, random_state=42)
    labels = kmeans.fit_predict(X_scaled)
    
    metrics['k'].append(k)
    metrics['inertia'].append(kmeans.inertia_)
    metrics['silhouette'].append(silhouette_score(X_scaled, labels))
    metrics['calinski'].append(calinski_harabasz_score(X_scaled, labels))
    metrics['davies_bouldin'].append(davies_bouldin_score(X_scaled, labels))
    
    print(f"k={k}: Silhouette={metrics['silhouette'][-1]:.3f}, "
          f"Davies-Bouldin={metrics['davies_bouldin'][-1]:.3f}")

# Gap Statistic for k=3 validation
print("\n" + "-" * 40)
print("Gap Statistic Analysis for k=3:")

def calculate_gap_statistic(X, k, n_refs=100):
    """Calculate gap statistic for given k"""
    kmeans = KMeans(n_clusters=k, n_init=50, random_state=42)
    kmeans.fit(X)
    
    # Observed within-cluster sum of squares
    obs_inertia = kmeans.inertia_
    
    # Generate reference datasets
    ref_inertias = []
    for _ in range(n_refs):
        # Random data with same shape and bounds
        random_data = np.random.uniform(X.min(axis=0), X.max(axis=0), X.shape)
        kmeans_ref = KMeans(n_clusters=k, n_init=10, random_state=42)
        kmeans_ref.fit(random_data)
        ref_inertias.append(kmeans_ref.inertia_)
    
    # Calculate gap
    gap = np.log(np.mean(ref_inertias)) - np.log(obs_inertia)
    sdk = np.std(np.log(ref_inertias)) * np.sqrt(1 + 1/n_refs)
    
    return gap, sdk

gap_3, sdk_3 = calculate_gap_statistic(X_scaled, k=3, n_refs=100)
gap_4, sdk_4 = calculate_gap_statistic(X_scaled, k=4, n_refs=100)

print(f"Gap(k=3) = {gap_3:.3f} ± {sdk_3:.3f}")
print(f"Gap(k=4) = {gap_4:.3f} ± {sdk_4:.3f}")

if gap_3 > gap_4 - sdk_4:
    print(" Gap statistic supports k=3")
else:
    print(" Gap statistic suggests k>3 might be better")

STEP 2: DATA PREPROCESSING FOR CLUSTERING
--------------------------------------------------

Handling outliers using IQR method:
 Capped 11 outlier values

Standardizing features:
 Features standardized using Z-score normalization

Feature statistics after scaling:
       Jan25_Op_Rate  RuPay_Penetration  Rural_Percent  Avg_Balance_Rs  \
count         36.000             36.000         36.000          36.000   
mean           0.000             -0.000          0.000           0.000   
std            1.014              1.014          1.014           1.014   
min           -2.165             -2.464         -2.423          -1.769   
25%           -0.500             -0.544         -0.575          -0.418   
50%            0.108              0.113          0.027          -0.054   
75%            0.610              0.737          0.656           0.482   
max            1.966              1.509          1.918           1.833   

       CAGR_2020_25  
count        36.000  
mean          0.000  


### STEP 4: K-MEANS CLUSTERING WITH K=3

In [12]:
print("STEP 4: K-MEANS CLUSTERING (K=3)")
print("-" * 50)

# Perform K-means with k=3 and multiple initializations
print("\nPerforming K-means with 100 random initializations...")

best_silhouette = -1
best_kmeans = None
best_labels = None

for i in range(100):
    kmeans = KMeans(n_clusters=3, n_init=1, max_iter=300, 
                    tol=1e-4, random_state=i)
    labels = kmeans.fit_predict(X_scaled)
    silhouette = silhouette_score(X_scaled, labels)
    
    if silhouette > best_silhouette:
        best_silhouette = silhouette
        best_kmeans = kmeans
        best_labels = labels

print(f" Best silhouette score: {best_silhouette:.3f}")

# Cluster statistics
print("\nCluster Distribution:")
print("-" * 30)
unique, counts = np.unique(best_labels, return_counts=True)
for cluster, count in zip(unique, counts):
    print(f"Cluster {cluster}: {count} states ({count/len(best_labels)*100:.1f}%)")

# Check minimum cluster size
min_cluster_size = min(counts)
if min_cluster_size < 10:
    print(f"\n Warning: Smallest cluster has only {min_cluster_size} states")
else:
    print(f"\n All clusters have adequate size (min: {min_cluster_size} states)")

STEP 4: K-MEANS CLUSTERING (K=3)
--------------------------------------------------

Performing K-means with 100 random initializations...
 Best silhouette score: 0.218

Cluster Distribution:
------------------------------
Cluster 0: 15 states (41.7%)
Cluster 1: 15 states (41.7%)
Cluster 2: 6 states (16.7%)



### STEP 5: CLUSTER STABILITY ANALYSIS

In [16]:
print("STEP 5: CLUSTER STABILITY ANALYSIS")
print("-" * 50)

print("\nPerforming bootstrap stability analysis (1000 iterations)...")

n_bootstrap = 1000
n_states = len(X_scaled)
stability_matrix = np.zeros((n_states, n_states))

for i in range(n_bootstrap):
    # Bootstrap sample
    idx = np.random.choice(n_states, n_states, replace=True)
    X_boot = X_scaled[idx]
    
    # Cluster bootstrap sample
    kmeans_boot = KMeans(n_clusters=3, n_init=10, random_state=i)
    labels_boot = kmeans_boot.fit_predict(X_boot)
    
    # Update co-occurrence matrix
    for j in range(n_states):
        for k in range(j+1, n_states):
            if idx[j] == idx[k]:  # Same original state
                continue
            if labels_boot[j] == labels_boot[k]:
                stability_matrix[idx[j], idx[k]] += 1
                stability_matrix[idx[k], idx[j]] += 1

# Normalize by number of co-occurrences
stability_matrix = stability_matrix / n_bootstrap

# Calculate average stability for each cluster
cluster_stability = []
for cluster in range(3):
    cluster_mask = best_labels == cluster
    if cluster_mask.sum() > 1:
        cluster_pairs = stability_matrix[np.ix_(cluster_mask, cluster_mask)]
        np.fill_diagonal(cluster_pairs, 0)
        avg_stability = cluster_pairs.sum() / (cluster_mask.sum() * (cluster_mask.sum() - 1))
        cluster_stability.append(avg_stability)
    else:
        cluster_stability.append(0)

print("\nCluster Stability (Jaccard similarity):")
for i, stability in enumerate(cluster_stability):
    print(f"  Cluster {i}: {stability:.3f}")

overall_stability = np.mean(cluster_stability)
print(f"\nOverall stability: {overall_stability:.3f}")

if overall_stability > 0.65:
    print(" Clusters show good stability")
else:
    print(" Clusters show moderate stability")

STEP 5: CLUSTER STABILITY ANALYSIS
--------------------------------------------------

Performing bootstrap stability analysis (1000 iterations)...

Cluster Stability (Jaccard similarity):
  Cluster 0: 0.682
  Cluster 1: 0.738
  Cluster 2: 0.797

Overall stability: 0.739
 Clusters show good stability


### STEP 6: HIERARCHICAL CLUSTERING VALIDATION

In [18]:
print("STEP 6: HIERARCHICAL CLUSTERING VALIDATION")
print("-" * 50)

# Perform hierarchical clustering
print("\nPerforming hierarchical clustering with Ward's method...")

linkage_matrix = linkage(X_scaled, method='ward')
hierarchical_labels = fcluster(linkage_matrix, 3, criterion='maxclust') - 1

# Calculate cophenetic correlation
from scipy.cluster.hierarchy import cophenet
cophenetic_corr, cophenetic_distances = cophenet(linkage_matrix, pdist(X_scaled))
print(f" Cophenetic correlation: {cophenetic_corr:.3f}")

# Compare with K-means results
from sklearn.metrics import adjusted_rand_score
agreement = adjusted_rand_score(best_labels, hierarchical_labels)
print(f" Agreement between K-means and Hierarchical: {agreement:.3f}")

if agreement > 0.5:
    print(" Good agreement between clustering methods")
else:
    print(" Moderate agreement between methods")

STEP 6: HIERARCHICAL CLUSTERING VALIDATION
--------------------------------------------------

Performing hierarchical clustering with Ward's method...
 Cophenetic correlation: 0.539
 Agreement between K-means and Hierarchical: 0.519
 Good agreement between clustering methods


### STEP 7: CLUSTER CHARACTERIZATION

In [20]:
print("STEP 7: CLUSTER CHARACTERIZATION")
print("-" * 50)

# Calculate cluster centroids
cluster_centers = pd.DataFrame(
    best_kmeans.cluster_centers_,
    columns=clustering_features
)

# Transform back to original scale for interpretation
cluster_centers_original = pd.DataFrame(
    scaler.inverse_transform(cluster_centers),
    columns=clustering_features
)

print("\nCluster Centroids (Original Scale):")
print("-" * 50)
print(cluster_centers_original.round(2))

# Statistical tests for feature differences
print("\n" + "-" * 50)
print("Statistical Significance of Feature Differences:")
print("-" * 50)

for feature in clustering_features:
    groups = [X[feature].values[best_labels == i] for i in range(3)]
    
    # ANOVA for normally distributed features
    f_stat, p_value = f_oneway(*groups)
    
    # Kruskal-Wallis for non-parametric alternative
    h_stat, p_value_kw = kruskal(*groups)
    
    print(f"\n{feature}:")
    print(f"  ANOVA: F={f_stat:.2f}, p={p_value:.4f}")
    print(f"  Kruskal-Wallis: H={h_stat:.2f}, p={p_value_kw:.4f}")
    
    if p_value < 0.05:
        print(f"   Significant difference between clusters")
    else:
        print(f"   No significant difference")

STEP 7: CLUSTER CHARACTERIZATION
--------------------------------------------------

Cluster Centroids (Original Scale):
--------------------------------------------------
   Jan25_Op_Rate  RuPay_Penetration  Rural_Percent  Avg_Balance_Rs  \
0           0.70               0.56           0.80            0.37   
1           0.58               0.79           0.54           -0.42   
2           0.21               0.76           0.74            0.48   

   CAGR_2020_25  
0          0.49  
1          0.11  
2         -1.46  

--------------------------------------------------
Statistical Significance of Feature Differences:
--------------------------------------------------

Jan25_Op_Rate:
  ANOVA: F=25.89, p=0.0000
  Kruskal-Wallis: H=17.40, p=0.0002
   Significant difference between clusters

RuPay_Penetration:
  ANOVA: F=6.24, p=0.0050
  Kruskal-Wallis: H=9.16, p=0.0102
   Significant difference between clusters

Rural_Percent:
  ANOVA: F=19.68, p=0.0000
  Kruskal-Wallis: H=21.50, p=0.000

### STEP 8: CLUSTER PROFILING

In [25]:
print("STEP 8: CLUSTER PROFILING")
print("-" * 50)

# Create detailed cluster profiles
cluster_profiles = []

for cluster_id in range(3):
    cluster_mask = best_labels == cluster_id
    cluster_states = state_names[cluster_mask]
    
    profile = {
        'cluster_id': cluster_id,
        'n_states': cluster_mask.sum(),
        'states': ', '.join(cluster_states[:5]) + ('...' if len(cluster_states) > 5 else ''),
        'characteristics': {}
    }
    
    # Calculate mean values for each feature
    for feature in clustering_features:
        profile['characteristics'][feature] = {
            'mean': X[feature].values[cluster_mask].mean(),
            'std': X[feature].values[cluster_mask].std()
        }
    
    cluster_profiles.append(profile)

# Assign cluster names based on characteristics
cluster_names = []
for i, profile in enumerate(cluster_profiles):
    chars = profile['characteristics']
    
    # Determine cluster type based on key metrics
    op_rate = chars.get('Jan25_Op_Rate', {}).get('mean', 0)
    growth = chars.get('CAGR_2020_25', {}).get('mean', 0)
    
    if i == 0:  # Customize based on actual characteristics
        if op_rate > 80:
            cluster_names.append("High Performers")
        elif growth > 10:
            cluster_names.append("Growth Markets")
        else:
            cluster_names.append("Emerging States")
    elif i == 1:
        if op_rate > 70 and op_rate <= 80:
            cluster_names.append("Moderate Performers")
        else:
            cluster_names.append("Development States")
    else:
        cluster_names.append("Priority Intervention States")

print("\nCluster Profiles:")
print("=" * 50)

for i, (profile, name) in enumerate(zip(cluster_profiles, cluster_names)):
    print(f"\nCluster {i}: {name}")
    print(f"  Number of states: {profile['n_states']}")
    print(f"  Sample states: {profile['states']}")
    print(f"  Key Characteristics:")
    
    for feature, stats in profile['characteristics'].items():
        print(f"    {feature}: {stats['mean']:.2f} (±{stats['std']:.2f})")

STEP 8: CLUSTER PROFILING
--------------------------------------------------

Cluster Profiles:

Cluster 0: Emerging States
  Number of states: 15
  Sample states: ANDAMAN AND NICOBAR ISLANDS, ARUNACHAL PRADESH, ASSAM, CHHATTISGARH, HIMACHAL PRADESH...
  Key Characteristics:
    Jan25_Op_Rate: 0.70 (±0.15)
    RuPay_Penetration: 0.56 (±0.21)
    Rural_Percent: 0.80 (±0.11)
    Avg_Balance_Rs: 0.37 (±1.02)
    CAGR_2020_25: 0.49 (±0.85)

Cluster 1: Development States
  Number of states: 15
  Sample states: ANDHRA PRADESH, BIHAR, CHANDIGARH, DELHI, GUJARAT...
  Key Characteristics:
    Jan25_Op_Rate: 0.58 (±0.12)
    RuPay_Penetration: 0.79 (±0.15)
    Rural_Percent: 0.54 (±0.13)
    Avg_Balance_Rs: -0.42 (±0.87)
    CAGR_2020_25: 0.11 (±0.51)

Cluster 2: Priority Intervention States
  Number of states: 6
  Sample states: DAMAN & DIU, GOA, JAMMU & KASHMIR, LADAKH, MANIPUR...
  Key Characteristics:
    Jan25_Op_Rate: 0.21 (±0.12)
    RuPay_Penetration: 0.76 (±0.10)
    Rural_Percent: 0.74

### STEP 9: ALTERNATIVE CLUSTERING METHODS

In [28]:
print("STEP 9: VALIDATION WITH ALTERNATIVE METHODS")
print("-" * 50)

# Gaussian Mixture Model
print("\nGaussian Mixture Model (k=3):")
gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=42)
gmm_labels = gmm.fit_predict(X_scaled)
gmm_silhouette = silhouette_score(X_scaled, gmm_labels)
print(f"  Silhouette score: {gmm_silhouette:.3f}")
print(f"  Agreement with K-means: {adjusted_rand_score(best_labels, gmm_labels):.3f}")

# DBSCAN (density-based)
print("\nDBSCAN (density-based clustering):")
dbscan = DBSCAN(eps=2.5, min_samples=3)
dbscan_labels = dbscan.fit_predict(X_scaled)
n_clusters_dbscan = len(set(dbscan_labels)) - (1 if -1 in dbscan_labels else 0)
n_noise = list(dbscan_labels).count(-1)
print(f"  Number of clusters: {n_clusters_dbscan}")
print(f"  Noise points: {n_noise}")

if n_clusters_dbscan > 1 and n_noise < len(dbscan_labels):
    # Only calculate silhouette if we have more than 1 cluster
    dbscan_silhouette = silhouette_score(X_scaled[dbscan_labels != -1], 
                                         dbscan_labels[dbscan_labels != -1])
    print(f"  Silhouette score: {dbscan_silhouette:.3f}")
elif n_clusters_dbscan == 1:
    print("   DBSCAN found only 1 cluster - parameters may need adjustment")
else:
    print("   DBSCAN could not find meaningful clusters with current parameters")

STEP 9: VALIDATION WITH ALTERNATIVE METHODS
--------------------------------------------------

Gaussian Mixture Model (k=3):
  Silhouette score: 0.135
  Agreement with K-means: 0.488

DBSCAN (density-based clustering):
  Number of clusters: 1
  Noise points: 0
   DBSCAN found only 1 cluster - parameters may need adjustment


### STEP 10: FINAL RESULTS AND RECOMMENDATIONS

In [31]:
print("STEP 10: FINAL RESULTS AND RECOMMENDATIONS")
print("=" * 80)

print("\n" + "=" * 50)
print("CLUSTERING ANALYSIS SUMMARY")
print("=" * 50)

print(f"\n Optimal number of clusters: 3")
print(f" Best silhouette score: {best_silhouette:.3f}")
print(f" Cluster stability: {overall_stability:.3f}")
print(f" Method agreement: {agreement:.3f}")

print("\n" + "-" * 50)
print("FINAL CLUSTER ASSIGNMENTS:")
print("-" * 50)

# Create final results dataframe
results_df = pd.DataFrame({
    'State': state_names,
    'Cluster': best_labels,
    'Cluster_Name': [cluster_names[label] for label in best_labels]
})

# Add original features for reference
for feature in clustering_features:
    results_df[feature] = X[feature].values

# Sort by cluster
results_df = results_df.sort_values(['Cluster', 'State'])

print("\nCluster Assignments by State:")
for cluster in range(3):
    cluster_data = results_df[results_df['Cluster'] == cluster]
    print(f"\n{cluster_names[cluster]} (Cluster {cluster}):")
    for _, row in cluster_data.iterrows():
        print(f"  - {row['State']}")

# Save results
results_df.to_csv('rq3_cluster_results.csv', index=False)
print("\n Results saved to 'rq3_cluster_results.csv'")

print("\n" + "=" * 80)
print("POLICY RECOMMENDATIONS")
print("=" * 80)

for i, name in enumerate(cluster_names):
    cluster_mask = best_labels == i
    print(f"\n{name} (Cluster {i}):")
    
    # Get cluster characteristics
    op_rate = X['Jan25_Op_Rate'].values[cluster_mask].mean() if 'Jan25_Op_Rate' in X.columns else 0
    growth = X['CAGR_2020_25'].values[cluster_mask].mean() if 'CAGR_2020_25' in X.columns else 0
    rural = X['Rural_Percent'].values[cluster_mask].mean() if 'Rural_Percent' in X.columns else 0
    
    if op_rate > 80:
        print("  → Best practice states - document and share success strategies")
        print("  → Focus on maintaining momentum and innovation")
    elif op_rate > 70:
        print("  → Target for optimization programs")
        print("  → Focus on improving RuPay adoption and transaction frequency")
    else:
        print("  → Priority intervention required")
        print("  → Intensive support for account activation")
        print("  → Financial literacy campaigns needed")
    
    if rural > 70:
        print("  → Rural-focused initiatives required")
        print("  → Mobile banking and BC network expansion")

STEP 10: FINAL RESULTS AND RECOMMENDATIONS

CLUSTERING ANALYSIS SUMMARY

 Optimal number of clusters: 3
 Best silhouette score: 0.218
 Cluster stability: 0.739
 Method agreement: 0.519

--------------------------------------------------
FINAL CLUSTER ASSIGNMENTS:
--------------------------------------------------

Cluster Assignments by State:

Emerging States (Cluster 0):
  - ANDAMAN AND NICOBAR ISLANDS
  - ARUNACHAL PRADESH
  - ASSAM
  - CHHATTISGARH
  - HIMACHAL PRADESH
  - JHARKHAND
  - KERALA
  - LAKSHADWEEP
  - MEGHALAYA
  - MIZORAM
  - ODISHA
  - TRIPURA
  - UTTAR PRADESH
  - UTTARAKHAND
  - WEST BENGAL

Development States (Cluster 1):
  - ANDHRA PRADESH
  - BIHAR
  - CHANDIGARH
  - DELHI
  - GUJARAT
  - HARYANA
  - KARNATAKA
  - MADHYA PRADESH
  - MAHARASHTRA
  - NAGALAND
  - PUDUCHERRY
  - PUNJAB
  - RAJASTHAN
  - TAMIL NADU
  - TELANGANA

Priority Intervention States (Cluster 2):
  - DAMAN & DIU
  - GOA
  - JAMMU & KASHMIR
  - LADAKH
  - MANIPUR
  - SIKKIM

 Results saved to 