In [55]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import networkx as nx
from collections import defaultdict
import math
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import pearsonr
%matplotlib notebook
# pd.options.display.float_format = '{:.5f}'.format

The goal here is to take a target value of maize production, and calculate how much/ to what values the factors considered need to be changed to get that value.

We also consider sensitivity values for each of the 7 factors from 0 - 1, which determines how significant we consider that factor is with regards to our calculations.

MP/TP * Factor

Proportional allocation based on factor to total production

The above formula will be used to get the estimated proportion of factors that contribute to Maize

Comparing the new data and the old data after partitioning the factors for Maize production

In [56]:
import plotly.graph_objs as go
import numpy as np

# Data
parameters = ['Irrigated by Wells', 'Irrigated by TubeWells',
              'LiftIrrigation', 'Area (Maize)', 'Tractors Count',
              'Total Loan', 'Total NPK']

new_values = [9738.9, 2745.4, 189.96, 44841, 4319.6, 1389086, 11493]
old_values = [1850.9, 8337.5,242.24, 39104, 2576.8, 15354, 8126.9]
temp = [0,0,0,0,0,0,0]

new_values_scaled = []
old_values_scaled = []

# Scale the values for each parameter individually
for i, param in enumerate(parameters):
    min_val = min(new_values[i], old_values[i],temp[i])
    max_val = max(new_values[i], old_values[i],temp[i])
    new_values_scaled.append((new_values[i] - min_val) / (max_val - min_val))
    old_values_scaled.append((old_values[i] - min_val) / (max_val - min_val))

# Create traces
trace1 = go.Bar(
    y=parameters,  # Switched x to y for horizontal bars
    x=new_values,  # Switched x to y for horizontal bars
    name='NEW',
    text=new_values,
    textposition='auto',
    orientation='h'  # Set orientation to horizontal
)
trace2 = go.Bar(
    y=parameters,  # Switched x to y for horizontal bars
    x=old_values,  # Switched x to y for horizontal bars
    name='OLD',
    text=old_values,
    textposition='auto',
    orientation='h'  # Set orientation to horizontal
)

data = [trace1, trace2]
layout = go.Layout(
    title='Comparison of Parameters between NEW and OLD datasets (Individually Scaled)',
    xaxis=dict(
        title='Scaled Values'
    ),
    yaxis=dict(
        tickangle=-45,
        tickmode='array',
        tickvals=list(range(len(parameters))),
        ticktext=parameters
    ),
    barmode='group',
    width=900,  # Set the width of the plot
    height=600  # Set the height of the plot
)

fig = go.Figure(data=data, layout=layout)
fig.show()


These are the data files generated for all the combinations of the sensitivity for all the factors, they are too big, so they are stored in 3 parts. (They take a long time to calculate)

In [57]:
viz_df1 = pd.read_csv("x_col_new_all_comb_10_20_part_1.csv")
viz_df2 = pd.read_csv("x_col_new_all_comb_10_20_part_2.csv")
viz_df3 = pd.read_csv("x_col_new_all_comb_10_20_part_3.csv")

In [66]:
Viz_df = pd.concat([viz_df1, viz_df2,viz_df3], axis=0)

In [67]:
Viz_df

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179),Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182),...,Sensitivity (AreaUnderCereals_Maize_193),Sensitivity (MotorVehicles_Tractors_514),Sensitivity (TotalAgricultureLoan),Sensitivity (TotalNPK_315),Maize_Production,New Stability,New Stress,Percentage Change,Relative Change,Abbreviation
0,BENGALURU,0.000000,175.723235,0.000000,1129.000000,275.164389,9.802510e+01,438.180210,0.0,0.0,...,0.0,0.0,0.0,0.0,1986.727350,0.978939,0.021061,-30.992450,-0.309925,BLR
1,BENGALURU(R),116.814760,3197.193185,0.157645,14867.000000,154.649500,3.429301e+03,3036.080250,0.0,0.0,...,0.0,0.0,0.0,0.0,40824.817100,0.980141,0.019859,-4.128838,-0.041288,BLR(R)
2,RAMANAGARA,0.000000,709.859804,0.118964,1987.000000,339.256359,9.280358e+02,222.225258,0.0,0.0,...,0.0,0.0,0.0,0.0,2981.768487,0.923336,0.076664,-71.466330,-0.714663,RGA
3,CHITRADURGA,0.000000,28105.507250,32.509127,92194.000000,5647.169517,3.764733e+04,10902.406570,0.0,0.0,...,0.0,0.0,0.0,0.0,283801.723800,0.870357,0.129643,4.348460,0.043485,CDA
4,DAVANAGERE,221.834115,20213.730880,402.912659,173400.000000,8341.220655,2.734483e+04,25702.835480,0.0,0.0,...,0.0,0.0,0.0,0.0,561073.411500,0.409416,0.590584,-5.470629,-0.054706,DVG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421870,KALABURAGI,31707.723927,0.000000,0.000000,14107.908818,2539.600513,1.833599e+06,5799.019427,1.0,1.0,...,1.0,1.0,1.0,1.0,132033.173592,0.911311,0.090146,803.037915,8.030379,KLB
421871,YADGIRI,31595.098263,0.000000,0.000000,12434.908818,2364.520103,1.831851e+06,5457.535282,1.0,1.0,...,1.0,1.0,1.0,1.0,124671.346026,0.898440,0.105729,4564.098243,45.640982,YDR
421872,RAICHUR,31559.409135,0.000000,0.000000,14073.908818,2338.026505,1.831720e+06,5150.465332,1.0,1.0,...,1.0,1.0,1.0,1.0,126429.240238,0.752912,0.247164,17386.755220,173.867552,RCR
421873,KOPPAL,31551.972088,0.000000,0.000000,59683.908818,4764.310514,1.842332e+06,16086.468216,1.0,1.0,...,1.0,1.0,1.0,1.0,306817.777322,0.954826,0.041604,61.269147,0.612691,KPL


In [75]:
Another = pd.DataFrame()
Another['District'] = Viz_df['District']
Another['NetAreaIrrigated_Wells_NetAreaIrrigated']=Viz_df['NetAreaIrrigated_Wells_NetAreaIrrigated']
Another['NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea']=Viz_df["NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea"]

Another['NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No']=Viz_df["NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No"]
Another['AreaUnderCereals_Maize']=Viz_df["AreaUnderCereals_Maize"]
Another['MotorVehicles_Tractors']=Viz_df["MotorVehicles_Tractors"]
Another['TotalAgricultureLoan']=Viz_df["TotalAgricultureLoan"]
Another['TotalNPK']=Viz_df["TotalNPK"]


In [78]:
Another

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.000000,175.723235,0.000000,1129.000000,275.164389,9.802510e+01,438.180210,1986.727350
1,BENGALURU(R),116.814760,3197.193185,0.157645,14867.000000,154.649500,3.429301e+03,3036.080250,40824.817100
2,RAMANAGARA,0.000000,709.859804,0.118964,1987.000000,339.256359,9.280358e+02,222.225258,2981.768487
3,CHITRADURGA,0.000000,28105.507250,32.509127,92194.000000,5647.169517,3.764733e+04,10902.406570,283801.723800
4,DAVANAGERE,221.834115,20213.730880,402.912659,173400.000000,8341.220655,2.734483e+04,25702.835480,561073.411500
...,...,...,...,...,...,...,...,...,...
421870,KALABURAGI,31707.723927,0.000000,0.000000,14107.908818,2539.600513,1.833599e+06,5799.019427,132033.173592
421871,YADGIRI,31595.098263,0.000000,0.000000,12434.908818,2364.520103,1.831851e+06,5457.535282,124671.346026
421872,RAICHUR,31559.409135,0.000000,0.000000,14073.908818,2338.026505,1.831720e+06,5150.465332,126429.240238
421873,KOPPAL,31551.972088,0.000000,0.000000,59683.908818,4764.310514,1.842332e+06,16086.468216,306817.777322


In [77]:
Another['Maize_Production'] = Viz_df['Maize_Production']

In [65]:
Viz_df

Unnamed: 0,District,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179),Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182),Sensitivity (NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No_183),Sensitivity (AreaUnderCereals_Maize_193),Sensitivity (MotorVehicles_Tractors_514),Sensitivity (TotalAgricultureLoan),Sensitivity (TotalNPK_315),New Stability,New Stress,Percentage Change,Relative Change,Abbreviation
0,BENGALURU,1129.000000,275.164389,9.802510e+01,438.180210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.978939,0.021061,-30.992450,-0.309925,BLR
1,BENGALURU(R),14867.000000,154.649500,3.429301e+03,3036.080250,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.980141,0.019859,-4.128838,-0.041288,BLR(R)
2,RAMANAGARA,1987.000000,339.256359,9.280358e+02,222.225258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.923336,0.076664,-71.466330,-0.714663,RGA
3,CHITRADURGA,92194.000000,5647.169517,3.764733e+04,10902.406570,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.870357,0.129643,4.348460,0.043485,CDA
4,DAVANAGERE,173400.000000,8341.220655,2.734483e+04,25702.835480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.409416,0.590584,-5.470629,-0.054706,DVG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421870,KALABURAGI,14107.908818,2539.600513,1.833599e+06,5799.019427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.911311,0.090146,803.037915,8.030379,KLB
421871,YADGIRI,12434.908818,2364.520103,1.831851e+06,5457.535282,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.898440,0.105729,4564.098243,45.640982,YDR
421872,RAICHUR,14073.908818,2338.026505,1.831720e+06,5150.465332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.752912,0.247164,17386.755220,173.867552,RCR
421873,KOPPAL,59683.908818,4764.310514,1.842332e+06,16086.468216,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.954826,0.041604,61.269147,0.612691,KPL


In [6]:
df1 = pd.read_csv("KAG 2016-17/Agriculture/Agriculture_KAG_2016_17.csv")
import re
def remove_pattern(col_name):
    return re.sub(r'_[0-9]{3}$', '', col_name)

df1.columns = map(remove_pattern, df1.columns)

In [7]:
df1['AreaUnderCereals_Maize'][20:]

20     30900
21     34163
22    170696
23      4576
24     99739
25      1382
26      2634
27       961
28      2600
29     48210
Name: AreaUnderCereals_Maize, dtype: int64

In [8]:
df1

Unnamed: 0,District,NetAreaIrrigated_Canals_Length,NetAreaIrrigated_Canals_GrossIrrigatedArea,NetAreaIrrigated_Canals_NetAreaIrrigated,NetAreaIrrigated_Tanks_No,NetAreaIrrigated_Tanks_GrossIrrigatedArea,NetAreaIrrigated_Tanks_NetAreaIrrigated,NetAreaIrrigated_Wells_No,NetAreaIrrigated_Wells_GrossIrrigatedArea,NetAreaIrrigated_Wells_NetAreaIrrigated,...,SowingSeedsDistributed_Safflower,SowingSeedsDistributed_Groundnut,SowingSeedsDistributed_Sunflower,SowingSeedsDistributed_Soyabean,TotalFoodGrains,TotalGourdVarietyVegetables,RRB_AgricultureLoan,DCCBank_AgricultureLoan,KSCARD_PLDBank_AgricultureLoan,TotalAgricultureLoan
0,BENGALURU,0,0,0,833,210,164,43,0,0,...,0.0,8.25,0.0,0.0,72752,11088,3450.06,923.83,840.78,5214.67
1,BENGALURU(R),0,0,0,1031,0,0,6017,831,741,...,0.0,10.65,0.0,0.0,131795,12614,10539.94,10040.5,1172.91,21753.35
2,RAMANAGARA,178,8918,7895,808,1843,1688,12323,0,0,...,0.0,217.8,0.0,0.0,175873,10071,9921.94,20868.05,413.86,31203.85
3,CHITRADURGA,142,2131,1820,368,0,0,12031,0,0,...,0.0,32035.0,348.0,0.0,421955,6113,105785.02,17273.65,853.14,123911.81
4,DAVANAGERE,203,96330,84789,413,1534,1534,3534,860,860,...,0.0,49693.5,623.1,0.0,1141132,11822,80725.12,24202.58,1081.95,106009.65
5,KOLAR,0,0,0,2980,0,0,27875,0,0,...,0.0,3015.5,0.0,0.0,87015,14692,70194.91,5775.47,1715.02,77685.4
6,CHIKKABALLAPURA,0,0,0,1981,0,0,733,0,0,...,0.0,6536.9,0.0,0.0,171962,19918,48782.34,8939.4,1827.19,59548.93
7,SHIVAMOGGA,378,50465,42502,5977,53747,50951,7673,6942,6400,...,0.0,30.0,0.0,0.0,613998,238,69036.55,31135.21,1844.32,102016.08
8,TUMAKURU,87,4099,4099,2232,8998,8998,39556,901,901,...,0.0,31094.2,30.0,0.0,450563,4382,104566.89,41077.53,1802.15,147446.57
9,CHIKKAMAGALURU,50,7640,6210,1561,6674,6384,426,173,173,...,0.0,1485.3,79.54,0.0,253087,13083,32999.48,45035.2,1322.43,79357.11


In [9]:
df1['Total_Production'] = df1['TotalCerealsandMinorMillets_Production'] + df1['TotalOilSeeds_Production'] + df1['TotalFoodGrains'] + df1['TotalGourdVarietyVegetables']
df1['Ratio'] = df1['Maize_Production']/df1['Total_Production']

In [10]:
df2=pd.read_csv("MaizeCaseFile_3.csv")

In [11]:
df2

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021,2879
1,BENGALURU(R),116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025,42583
2,RAMANAGARA,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258,10450
3,CHITRADURGA,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571,271975
4,DAVANAGERE,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548,593544
5,KOLAR,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906,1759
6,CHIKKABALLAPURA,0.0,14559.991862,6.070277,59377.0,3298.472645,17213.260595,8196.896749,105491
7,SHIVAMOGGA,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.926996,10428.246256,226532
8,TUMAKURU,64.863914,10417.677296,0.647919,24207.0,1910.209867,10614.829758,2746.313635,66868
9,CHIKKAMAGALURU,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037,61353


In [12]:
# df.columns

In [13]:
cols = ['District', 'NetAreaIrrigated_Wells_NetAreaIrrigated',
       'NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea',
       'NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No',
       'AreaUnderCereals_Maize', 'MotorVehicles_Tractors',
       'TotalAgricultureLoan', 'TotalNPK', 'Maize_Production']
df = df1[cols]

Taking the portions of the factor we estimate correspond to maize

In [14]:
df['NetAreaIrrigated_Wells_NetAreaIrrigated'] = df1['Ratio']*df['NetAreaIrrigated_Wells_NetAreaIrrigated']
df['NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea'] = df1['Ratio']*df['NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea']
df['NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No'] = df1['Ratio']*df['NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No']
df['MotorVehicles_Tractors'] = df1['Ratio']*df['MotorVehicles_Tractors']
df['TotalAgricultureLoan'] = df1['Ratio']*df['TotalAgricultureLoan']
df['TotalNPK'] = df1['Ratio']*df['TotalNPK']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [15]:
df2

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021,2879
1,BENGALURU(R),116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025,42583
2,RAMANAGARA,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258,10450
3,CHITRADURGA,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571,271975
4,DAVANAGERE,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548,593544
5,KOLAR,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906,1759
6,CHIKKABALLAPURA,0.0,14559.991862,6.070277,59377.0,3298.472645,17213.260595,8196.896749,105491
7,SHIVAMOGGA,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.926996,10428.246256,226532
8,TUMAKURU,64.863914,10417.677296,0.647919,24207.0,1910.209867,10614.829758,2746.313635,66868
9,CHIKKAMAGALURU,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037,61353


In [16]:
# df.fillna(0, inplace=True)


In [17]:
df

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.0,175.723235,0.0,1129,275.164389,98.025105,438.18021,2879
1,BENGALURU(R),116.81476,3197.193185,0.157645,14867,154.6495,3429.301433,3036.08025,42583
2,RAMANAGARA,0.0,709.859804,0.118964,1987,339.256359,928.035816,222.225258,10450
3,CHITRADURGA,0.0,28105.507253,32.509127,92194,5647.169517,37647.333954,10902.406571,271975
4,DAVANAGERE,221.834115,20213.730878,402.912659,173400,8341.220655,27344.83354,25702.83548,593544
5,KOLAR,0.0,200.386108,0.0,640,167.701482,772.943145,220.782906,1759
6,CHIKKABALLAPURA,0.0,14559.991862,6.070277,59377,3298.472645,17213.260595,8196.896749,105491
7,SHIVAMOGGA,1178.978184,5958.813645,764.862097,43827,3305.007438,18792.926996,10428.246256,226532
8,TUMAKURU,64.863914,10417.677296,0.647919,24207,1910.209867,10614.829758,2746.313635,66868
9,CHIKKAMAGALURU,20.792453,3145.789961,489.764426,25831,1908.819293,9537.739741,8156.29037,61353


In [18]:
df2.columns

Index(['District', 'NetAreaIrrigated_Wells_NetAreaIrrigated',
       'NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea',
       'NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No',
       'AreaUnderCereals_Maize', 'MotorVehicles_Tractors',
       'TotalAgricultureLoan', 'TotalNPK', 'Maize_Production'],
      dtype='object')

In [19]:
x_col_indices = [1, 2, 3, 4, 5, 6, 7]
x_col = df2.columns[x_col_indices]
x_col

Index(['NetAreaIrrigated_Wells_NetAreaIrrigated',
       'NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea',
       'NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No',
       'AreaUnderCereals_Maize', 'MotorVehicles_Tractors',
       'TotalAgricultureLoan', 'TotalNPK'],
      dtype='object')

In [20]:
# filtered_df = df2[(df2['Maize_Production'] != 0) & (~df2['Maize_Production'].isnull())]

# x = filtered_df[x_col]
x = df2[x_col]
x

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
0,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021
1,116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025
2,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258
3,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571
4,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548
5,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906
6,0.0,14559.991862,6.070277,59377.0,3298.472645,17213.260595,8196.896749
7,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.926996,10428.246256
8,64.863914,10417.677296,0.647919,24207.0,1910.209867,10614.829758,2746.313635
9,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037


In [21]:
# x = df[x_col]
x.head()

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
0,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021
1,116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025
2,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258
3,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571
4,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548


In [22]:
x.corr()

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
NetAreaIrrigated_Wells_NetAreaIrrigated,1.0,0.584103,0.352209,0.346496,0.629435,0.695026,0.711341
NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,0.584103,1.0,0.431232,0.737304,0.86259,0.931092,0.838434
NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,0.352209,0.431232,1.0,0.687381,0.451851,0.461385,0.586761
AreaUnderCereals_Maize,0.346496,0.737304,0.687381,1.0,0.809383,0.701799,0.827175
MotorVehicles_Tractors,0.629435,0.86259,0.451851,0.809383,1.0,0.914843,0.917063
TotalAgricultureLoan,0.695026,0.931092,0.461385,0.701799,0.914843,1.0,0.904839
TotalNPK,0.711341,0.838434,0.586761,0.827175,0.917063,0.904839,1.0


In [23]:
p_values = x.corr(method=lambda i, j: pearsonr(i, j)[1])  # Calculate p-values
p_values

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
NetAreaIrrigated_Wells_NetAreaIrrigated,1.0,0.0005607037,0.05199,0.05618962,0.000148522,1.431952e-05,7.262428e-06
NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,0.000561,1.0,0.015432,2.231155e-06,4.410148e-10,3.125824e-14,3.920724e-09
NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,0.05199,0.01543199,1.0,1.939168e-05,0.01071687,0.008986717,0.0005214883
AreaUnderCereals_Maize,0.05619,2.231155e-06,1.9e-05,1.0,3.534026e-08,1.086114e-05,9.64384e-09
MotorVehicles_Tractors,0.000149,4.410148e-10,0.010717,3.534026e-08,1.0,6.054842e-13,4.188684e-13
TotalAgricultureLoan,1.4e-05,3.125824e-14,0.008987,1.086114e-05,6.054842e-13,1.0,2.837568e-12
TotalNPK,7e-06,3.920724e-09,0.000521,9.64384e-09,4.188684e-13,2.837568e-12,1.0


In [24]:
checker = df2[df2.columns[x_col_indices]]
p_values = checker.corr(method=lambda i, j: pearsonr(i, j)[1])
p_values

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
NetAreaIrrigated_Wells_NetAreaIrrigated,1.0,0.0005607037,0.05199,0.05618962,0.000148522,1.431952e-05,7.262428e-06
NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,0.000561,1.0,0.015432,2.231155e-06,4.410148e-10,3.125824e-14,3.920724e-09
NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,0.05199,0.01543199,1.0,1.939168e-05,0.01071687,0.008986717,0.0005214883
AreaUnderCereals_Maize,0.05619,2.231155e-06,1.9e-05,1.0,3.534026e-08,1.086114e-05,9.64384e-09
MotorVehicles_Tractors,0.000149,4.410148e-10,0.010717,3.534026e-08,1.0,6.054842e-13,4.188684e-13
TotalAgricultureLoan,1.4e-05,3.125824e-14,0.008987,1.086114e-05,6.054842e-13,1.0,2.837568e-12
TotalNPK,7e-06,3.920724e-09,0.000521,9.64384e-09,4.188684e-13,2.837568e-12,1.0


In [25]:
checker.corr()

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
NetAreaIrrigated_Wells_NetAreaIrrigated,1.0,0.584103,0.352209,0.346496,0.629435,0.695026,0.711341
NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,0.584103,1.0,0.431232,0.737304,0.86259,0.931092,0.838434
NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,0.352209,0.431232,1.0,0.687381,0.451851,0.461385,0.586761
AreaUnderCereals_Maize,0.346496,0.737304,0.687381,1.0,0.809383,0.701799,0.827175
MotorVehicles_Tractors,0.629435,0.86259,0.451851,0.809383,1.0,0.914843,0.917063
TotalAgricultureLoan,0.695026,0.931092,0.461385,0.701799,0.914843,1.0,0.904839
TotalNPK,0.711341,0.838434,0.586761,0.827175,0.917063,0.904839,1.0


In [26]:
df2.columns

Index(['District', 'NetAreaIrrigated_Wells_NetAreaIrrigated',
       'NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea',
       'NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No',
       'AreaUnderCereals_Maize', 'MotorVehicles_Tractors',
       'TotalAgricultureLoan', 'TotalNPK', 'Maize_Production'],
      dtype='object')

In [27]:
# y = filtered_df['Maize_Production']
y = df2['Maize_Production']
y

0       2879
1      42583
2      10450
3     271975
4     593544
5       1759
6     105491
7     226532
8      66868
9      61353
10         0
11       109
12    333937
13     17921
14     19387
15    137154
16    147886
17    576758
18    226327
19    251699
20     90634
21    138476
22    413487
23     24088
24    104045
25      2791
26     14621
27      2673
28       723
29    190252
30    104045
Name: Maize_Production, dtype: int64

In [28]:
x

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
0,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021
1,116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025
2,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258
3,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571
4,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548
5,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906
6,0.0,14559.991862,6.070277,59377.0,3298.472645,17213.260595,8196.896749
7,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.926996,10428.246256
8,64.863914,10417.677296,0.647919,24207.0,1910.209867,10614.829758,2746.313635
9,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037


In [29]:
x.corr()

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
NetAreaIrrigated_Wells_NetAreaIrrigated,1.0,0.584103,0.352209,0.346496,0.629435,0.695026,0.711341
NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,0.584103,1.0,0.431232,0.737304,0.86259,0.931092,0.838434
NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,0.352209,0.431232,1.0,0.687381,0.451851,0.461385,0.586761
AreaUnderCereals_Maize,0.346496,0.737304,0.687381,1.0,0.809383,0.701799,0.827175
MotorVehicles_Tractors,0.629435,0.86259,0.451851,0.809383,1.0,0.914843,0.917063
TotalAgricultureLoan,0.695026,0.931092,0.461385,0.701799,0.914843,1.0,0.904839
TotalNPK,0.711341,0.838434,0.586761,0.827175,0.917063,0.904839,1.0


In [30]:
x=sm.add_constant(x)
x.head()

Unnamed: 0,const,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
0,1.0,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021
1,1.0,116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025
2,1.0,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258
3,1.0,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571
4,1.0,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548


In [31]:
x

Unnamed: 0,const,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
0,1.0,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021
1,1.0,116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025
2,1.0,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258
3,1.0,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571
4,1.0,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548
5,1.0,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906
6,1.0,0.0,14559.991862,6.070277,59377.0,3298.472645,17213.260595,8196.896749
7,1.0,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.926996,10428.246256
8,1.0,64.863914,10417.677296,0.647919,24207.0,1910.209867,10614.829758,2746.313635
9,1.0,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037


Linear regression on the factors and maize production

In [32]:
model=sm.OLS(y,x).fit()
model.summary()

0,1,2,3
Dep. Variable:,Maize_Production,R-squared:,0.967
Model:,OLS,Adj. R-squared:,0.957
Method:,Least Squares,F-statistic:,96.92
Date:,"Sat, 22 Jun 2024",Prob (F-statistic):,1.51e-15
Time:,19:35:11,Log-Likelihood:,-362.46
No. Observations:,31,AIC:,740.9
Df Residuals:,23,BIC:,752.4
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-5460.0154,8600.392,-0.635,0.532,-2.33e+04,1.23e+04
NetAreaIrrigated_Wells_NetAreaIrrigated,0.7970,1.668,0.478,0.637,-2.655,4.249
NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,-0.9727,1.870,-0.520,0.608,-4.841,2.896
NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,-28.8758,14.614,-1.976,0.060,-59.107,1.355
AreaUnderCereals_Maize,2.1917,0.436,5.022,0.000,1.289,3.094
MotorVehicles_Tractors,10.8178,7.265,1.489,0.150,-4.211,25.847
TotalAgricultureLoan,0.0137,1.524,0.009,0.993,-3.138,3.166
TotalNPK,4.9415,2.372,2.083,0.049,0.035,9.848

0,1,2,3
Omnibus:,4.032,Durbin-Watson:,2.674
Prob(Omnibus):,0.133,Jarque-Bera (JB):,3.094
Skew:,-0.236,Prob(JB):,0.213
Kurtosis:,4.474,Cond. No.,94400.0


In [33]:
y.mean()

134853.12903225806

In [34]:
model.params.values

array([-5.46001544e+03,  7.96998390e-01, -9.72700128e-01, -2.88757924e+01,
        2.19165686e+00,  1.08178108e+01,  1.37291391e-02,  4.94149678e+00])

In [35]:
coef=model.params.values[1:]
# coef

In [36]:
coef

array([ 7.96998390e-01, -9.72700128e-01, -2.88757924e+01,  2.19165686e+00,
        1.08178108e+01,  1.37291391e-02,  4.94149678e+00])

In [37]:
x.drop('const',axis=1,inplace=True)

In [38]:
x

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK
0,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021
1,116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025
2,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258
3,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571
4,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548
5,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906
6,0.0,14559.991862,6.070277,59377.0,3298.472645,17213.260595,8196.896749
7,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.926996,10428.246256
8,64.863914,10417.677296,0.647919,24207.0,1910.209867,10614.829758,2746.313635
9,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037


The sensitivity Values

In [39]:
import numpy as np
import pandas as pd
from itertools import product

arr = np.arange(0, 1.25, 0.25)
columns=['Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179)', 'Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182)' , 'Sensitivity (NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No_183)','Sensitivity (AreaUnderCereals_Maize_193)','Sensitivity (MotorVehicles_Tractors_514)', 'Sensitivity (TotalAgricultureLoan)' , 'Sensitivity (TotalNPK_315)']
# Generate all possible combinations of elements in arr
combinations = product(arr, repeat=len(columns))

# alpha_df = pd.DataFrame(combinations, columns=['Sensitivity (TotalNPK)', 'Sensitivity (RRB_AgricultureLoan)', 'Sensitivity (KSCARD_PLDBank_AgricultureLoan)', 'Sensitivity (SowingSeedsDistributed_Maize)'])

In [40]:
alpha_df = pd.DataFrame(combinations, columns=columns)
alpha_df.head()

Unnamed: 0,Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179),Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182),Sensitivity (NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No_183),Sensitivity (AreaUnderCereals_Maize_193),Sensitivity (MotorVehicles_Tractors_514),Sensitivity (TotalAgricultureLoan),Sensitivity (TotalNPK_315)
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.25
2,0.0,0.0,0.0,0.0,0.0,0.0,0.5
3,0.0,0.0,0.0,0.0,0.0,0.0,0.75
4,0.0,0.0,0.0,0.0,0.0,0.0,1.0


Computing the new values for all the factors for all the combinations of sensitivity values

In [41]:
import pandas as pd
import numpy as np

x_new_all_comb = pd.DataFrame()
target=160000

def computing_new_x(alpha_x):
    temp_df = (x - np.array(alpha_x) * (y.mean() - target) / coef)
    alpha = pd.DataFrame(np.tile(alpha_x, (len(temp_df), 1)),columns=alpha_x.index)
    temp_df = pd.concat([temp_df, alpha], axis=1)
    return temp_df

for index, row in alpha_df.iterrows():
    x_new_all_comb = pd.concat([x_new_all_comb, computing_new_x(row)], ignore_index=True)

KeyboardInterrupt: 

In [42]:
x_new_all_comb

Unnamed: 0,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179),Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182),Sensitivity (NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No_183),Sensitivity (AreaUnderCereals_Maize_193),Sensitivity (MotorVehicles_Tractors_514),Sensitivity (TotalAgricultureLoan),Sensitivity (TotalNPK_315)
0,0.000000,175.723235,0.000000,1129.000000,275.164389,98.025105,438.180210,0.0,0.00,0.00,0.00,0.00,0.00,0.0
1,116.814760,3197.193185,0.157645,14867.000000,154.649500,3429.301433,3036.080250,0.0,0.00,0.00,0.00,0.00,0.00,0.0
2,0.000000,709.859804,0.118964,1987.000000,339.256359,928.035816,222.225258,0.0,0.00,0.00,0.00,0.00,0.00,0.0
3,0.000000,28105.507253,32.509127,92194.000000,5647.169517,37647.333954,10902.406571,0.0,0.00,0.00,0.00,0.00,0.00,0.0
4,221.834115,20213.730878,402.912659,173400.000000,8341.220655,27344.833540,25702.835480,0.0,0.00,0.00,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167581,155.751838,-5821.401575,-651.648426,11239.431613,796.165219,459867.289684,710.101614,0.0,0.25,0.75,0.75,0.25,0.25,0.0
167582,43.126175,-6374.283061,-643.543795,9566.431613,621.084808,458119.648871,368.617469,0.0,0.25,0.75,0.75,0.25,0.25,0.0
167583,7.437047,-6449.211637,-650.442794,11205.431613,594.591211,457988.590962,61.547519,0.0,0.25,0.75,0.75,0.25,0.25,0.0
167584,0.000000,5880.075960,-651.652349,56815.431613,3020.875220,468600.369025,10997.550403,0.0,0.25,0.75,0.75,0.25,0.25,0.0


In [79]:
x_new_all_comb = Viz_df

In [80]:
x_new_all_comb[:31]

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179),Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182),...,Sensitivity (AreaUnderCereals_Maize_193),Sensitivity (MotorVehicles_Tractors_514),Sensitivity (TotalAgricultureLoan),Sensitivity (TotalNPK_315),Maize_Production,New Stability,New Stress,Percentage Change,Relative Change,Abbreviation
0,BENGALURU,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021,0.0,0.0,...,0.0,0.0,0.0,0.0,1986.72735,0.978939,0.021061,-30.99245,-0.309925,BLR
1,BENGALURU(R),116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025,0.0,0.0,...,0.0,0.0,0.0,0.0,40824.8171,0.980141,0.019859,-4.128838,-0.041288,BLR(R)
2,RAMANAGARA,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258,0.0,0.0,...,0.0,0.0,0.0,0.0,2981.768487,0.923336,0.076664,-71.46633,-0.714663,RGA
3,CHITRADURGA,0.0,28105.50725,32.509127,92194.0,5647.169517,37647.33395,10902.40657,0.0,0.0,...,0.0,0.0,0.0,0.0,283801.7238,0.870357,0.129643,4.34846,0.043485,CDA
4,DAVANAGERE,221.834115,20213.73088,402.912659,173400.0,8341.220655,27344.83354,25702.83548,0.0,0.0,...,0.0,0.0,0.0,0.0,561073.4115,0.409416,0.590584,-5.470629,-0.054706,DVG
5,KOLAR,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.872247,0.127753,-100.0,-1.0,KLR
6,CHIKKABALLAPURA,0.0,14559.99186,6.070277,59377.0,3298.472645,17213.2606,8196.896749,0.0,0.0,...,0.0,0.0,0.0,0.0,186759.7192,0.750927,0.249073,77.038533,0.770385,CKA
7,SHIVAMOGGA,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.927,10428.24626,0.0,0.0,...,0.0,0.0,0.0,0.0,151193.3368,0.891715,0.108285,-33.257404,-0.332574,SMG
8,TUMAKURU,64.863914,10417.6773,0.647919,24207.0,1910.209867,10614.82976,2746.313635,0.0,0.0,...,0.0,0.0,0.0,0.0,71874.05483,0.897377,0.102623,7.486473,0.074865,TKR
9,CHIKKAMAGALURU,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037,0.0,0.0,...,0.0,0.0,0.0,0.0,95051.47185,0.830322,0.169678,54.925549,0.549255,CMG


# Neg values

In [46]:
import numpy as np
import pandas as pd

def clip(col):
    return np.maximum(col, 0)

# Assuming x_new_all_comb is your DataFrame
numeric_cols = x_new_all_comb.select_dtypes(include=[np.number])  # Select only numeric columns
x_new_all_comb[numeric_cols.columns] = numeric_cols.apply(clip)   # Apply clip function only to numeric columns

print(x_new_all_comb)

            District  NetAreaIrrigated_Wells_NetAreaIrrigated  \
0          BENGALURU                                 0.000000   
1       BENGALURU(R)                               116.814760   
2         RAMANAGARA                                 0.000000   
3        CHITRADURGA                                 0.000000   
4         DAVANAGERE                               221.834115   
...              ...                                      ...   
421870    KALABURAGI                             31707.723927   
421871       YADGIRI                             31595.098263   
421872       RAICHUR                             31559.409135   
421873        KOPPAL                             31551.972088   
421874  Vijayanagara                             31758.972088   

        NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea  \
0                                              175.723235                  
1                                             3197.193185          

In [47]:
x_new_all_comb=sm.add_constant(x_new_all_comb)
x_new_all_comb

Unnamed: 0,const,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179),...,Sensitivity (AreaUnderCereals_Maize_193),Sensitivity (MotorVehicles_Tractors_514),Sensitivity (TotalAgricultureLoan),Sensitivity (TotalNPK_315),Maize_Production,New Stability,New Stress,Percentage Change,Relative Change,Abbreviation
0,1.0,BENGALURU,0.000000,175.723235,0.000000,1129.000000,275.164389,9.802510e+01,438.180210,0.0,...,0.0,0.0,0.0,0.0,1986.727350,0.978939,0.021061,0.000000,0.000000,BLR
1,1.0,BENGALURU(R),116.814760,3197.193185,0.157645,14867.000000,154.649500,3.429301e+03,3036.080250,0.0,...,0.0,0.0,0.0,0.0,40824.817100,0.980141,0.019859,0.000000,0.000000,BLR(R)
2,1.0,RAMANAGARA,0.000000,709.859804,0.118964,1987.000000,339.256359,9.280358e+02,222.225258,0.0,...,0.0,0.0,0.0,0.0,2981.768487,0.923336,0.076664,0.000000,0.000000,RGA
3,1.0,CHITRADURGA,0.000000,28105.507250,32.509127,92194.000000,5647.169517,3.764733e+04,10902.406570,0.0,...,0.0,0.0,0.0,0.0,283801.723800,0.870357,0.129643,4.348460,0.043485,CDA
4,1.0,DAVANAGERE,221.834115,20213.730880,402.912659,173400.000000,8341.220655,2.734483e+04,25702.835480,0.0,...,0.0,0.0,0.0,0.0,561073.411500,0.409416,0.590584,0.000000,0.000000,DVG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421870,1.0,KALABURAGI,31707.723927,0.000000,0.000000,14107.908818,2539.600513,1.833599e+06,5799.019427,1.0,...,1.0,1.0,1.0,1.0,132033.173592,0.911311,0.090146,803.037915,8.030379,KLB
421871,1.0,YADGIRI,31595.098263,0.000000,0.000000,12434.908818,2364.520103,1.831851e+06,5457.535282,1.0,...,1.0,1.0,1.0,1.0,124671.346026,0.898440,0.105729,4564.098243,45.640982,YDR
421872,1.0,RAICHUR,31559.409135,0.000000,0.000000,14073.908818,2338.026505,1.831720e+06,5150.465332,1.0,...,1.0,1.0,1.0,1.0,126429.240238,0.752912,0.247164,17386.755220,173.867552,RCR
421873,1.0,KOPPAL,31551.972088,0.000000,0.000000,59683.908818,4764.310514,1.842332e+06,16086.468216,1.0,...,1.0,1.0,1.0,1.0,306817.777322,0.954826,0.041604,61.269147,0.612691,KPL


Computing the new maize production values for all the new values of the factors

In [54]:
y_new_all_comb=sm.add_constant(x_new_all_comb.iloc[:,:8]).dot(model.params.values)
y_new_all_comb

NameError: name 'model_params' is not defined

In [None]:
model.params.values

array([-5.46001544e+03,  7.96998390e-01, -9.72700128e-01, -2.88757924e+01,
        2.19165686e+00,  1.08178108e+01,  1.37291391e-02,  4.94149678e+00])

In [None]:
x_new_all_comb.iloc[1,:len(columns)+1]

const                                                                        1.0
District                                                            BENGALURU(R)
NetAreaIrrigated_Wells_NetAreaIrrigated                                116.81476
NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea     3197.193185
NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No                 0.157645
AreaUnderCereals_Maize                                                   14867.0
MotorVehicles_Tractors                                                  154.6495
TotalAgricultureLoan                                                 3429.301433
Name: 1, dtype: object

In [None]:
y_new_all_comb=np.maximum(y_new_all_comb,0)
y_new_all_comb

NameError: name 'y_new_all_comb' is not defined

Technical functions

In [None]:
def init_graph(G,node_adj_frame):
    G.add_nodes_from([i for i in range(len(node_adj_frame))])
    labels = {}
    labels = node_adj_frame.columns
    for i in range(len(node_adj_frame)):
        snode = node_adj_frame[labels[0]][i]-1
        temp = node_adj_frame[labels[2]][i]
        if ',' in str(temp):
            sedge_arr = temp.split(',')
            for j in range(0, len(sedge_arr)):
                k = int(sedge_arr[j])
                G.add_edge(snode, k-1)
        elif np.isnan(temp):
            print("ERROR: Not found in the adjacency excel sheet")
        else:
            G.add_edge(snode, int(temp)-1)
    return

def init_graph_attr(G, AdjFile, values):
    node_adj_frame = pd.read_excel(AdjFile)
    node_list = node_adj_frame["District_Name"].tolist()
    node_list.insert(80, "")
    nodeAttr = {}
    init_graph(G, node_adj_frame)
    
    values = values.fillna(0)
    capability_vector = list(zip(*[values]))
    
    node_attri_dict = dict(zip(node_list[:31], capability_vector))
    node_attri_dict = dict((k, v) for k, v in node_attri_dict.items())

    for i in range(len(node_adj_frame)):
        temp = {}
        temp["capabilityvector"] = node_attri_dict[node_list[i]]
        temp["nodeStress"] = 0
        temp["name"] = node_list[i]
        nodeAttr[i] = temp

    nx.set_node_attributes(G, nodeAttr)

def addList(l1,l2):
    for i in range(len(l1)):
        l1[i] = l1[i] + l2[i]
    return l1
def divList(l1,k):
    for i in range(len(l1)):
        l1[i] = l1[i]/k
    return l1

def l2_normalization(l1,l2):
    k = 0
    for i in range(len(l1)):
        k+= (l1[i] - l2[i])**2
    return math.sqrt(k)

def get_node_stress(G,dim):
    stress_dict = {}
    for n in G.nodes():
        centroid = [0]*dim
        neighList = list(G.neighbors(n))
        for nei in neighList:
            try:
                centroid = addList(centroid,list(G.nodes[nei]["capabilityvector"]))
            except(KeyError):
                pass
        try:
            G.nodes[n]["nodeStress"] = l2_normalization(divList(centroid,len(neighList)),list(G.nodes[n]["capabilityvector"]))
        except(KeyError):
            pass
        try:
            stress_dict[G.nodes[n]["name"]]=G.nodes[n]["nodeStress"]
        except(KeyError):
            pass
    return stress_dict

def get_node_stability(G,dim):
    satbility_dict = {}
    for n in G.nodes():
        centroid = [0]*dim
        neighList = list(G.neighbors(n))
        for nei in neighList:
            try:
                centroid = addList(centroid,list(G.nodes[nei]["capabilityvector"]))
            except(KeyError):
                pass
        try:
            G.nodes[n]["nodeStability"] = 1 - l2_normalization(divList(centroid,len(neighList)),list(G.nodes[n]["capabilityvector"]))
        except(KeyError):
            pass
        try:
            satbility_dict[G.nodes[n]["name"]]=G.nodes[n]["nodeStability"]
        except(KeyError):
            pass
    return satbility_dict

In [None]:
x_new_all_comb['Maize_Production']=y_new_all_comb

NameError: name 'y_new_all_comb' is not defined

In [None]:
x_new_all_comb.drop("const",axis=1,inplace=True)
x_new_all_comb

In [None]:
x_new_all_comb.columns

In [81]:
df = Another

In [82]:
district_values = df1['District'].tolist()

# Repeat the district values to fill 2343750 rows
district_values_repeated = district_values * ((2421875 // len(district_values)) + 1)

# Trim the excess values to match the length of df
district_values_repeated = district_values_repeated[:len(df)]

# Add the 'District' column to df
df.insert(0, 'District', district_values_repeated)

ValueError: cannot insert District, already exists

In [83]:
df

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.000000,175.723235,0.000000,1129.000000,275.164389,9.802510e+01,438.180210,1986.727350
1,BENGALURU(R),116.814760,3197.193185,0.157645,14867.000000,154.649500,3.429301e+03,3036.080250,40824.817100
2,RAMANAGARA,0.000000,709.859804,0.118964,1987.000000,339.256359,9.280358e+02,222.225258,2981.768487
3,CHITRADURGA,0.000000,28105.507250,32.509127,92194.000000,5647.169517,3.764733e+04,10902.406570,283801.723800
4,DAVANAGERE,221.834115,20213.730880,402.912659,173400.000000,8341.220655,2.734483e+04,25702.835480,561073.411500
...,...,...,...,...,...,...,...,...,...
421870,KALABURAGI,31707.723927,0.000000,0.000000,14107.908818,2539.600513,1.833599e+06,5799.019427,132033.173592
421871,YADGIRI,31595.098263,0.000000,0.000000,12434.908818,2364.520103,1.831851e+06,5457.535282,124671.346026
421872,RAICHUR,31559.409135,0.000000,0.000000,14073.908818,2338.026505,1.831720e+06,5150.465332,126429.240238
421873,KOPPAL,31551.972088,0.000000,0.000000,59683.908818,4764.310514,1.842332e+06,16086.468216,306817.777322


In [84]:
x_new_all_comb['Maize_Production'].mean()

204524.99163244438

In [85]:
df

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.000000,175.723235,0.000000,1129.000000,275.164389,9.802510e+01,438.180210,1986.727350
1,BENGALURU(R),116.814760,3197.193185,0.157645,14867.000000,154.649500,3.429301e+03,3036.080250,40824.817100
2,RAMANAGARA,0.000000,709.859804,0.118964,1987.000000,339.256359,9.280358e+02,222.225258,2981.768487
3,CHITRADURGA,0.000000,28105.507250,32.509127,92194.000000,5647.169517,3.764733e+04,10902.406570,283801.723800
4,DAVANAGERE,221.834115,20213.730880,402.912659,173400.000000,8341.220655,2.734483e+04,25702.835480,561073.411500
...,...,...,...,...,...,...,...,...,...
421870,KALABURAGI,31707.723927,0.000000,0.000000,14107.908818,2539.600513,1.833599e+06,5799.019427,132033.173592
421871,YADGIRI,31595.098263,0.000000,0.000000,12434.908818,2364.520103,1.831851e+06,5457.535282,124671.346026
421872,RAICHUR,31559.409135,0.000000,0.000000,14073.908818,2338.026505,1.831720e+06,5150.465332,126429.240238
421873,KOPPAL,31551.972088,0.000000,0.000000,59683.908818,4764.310514,1.842332e+06,16086.468216,306817.777322


In [None]:
df2

Calculating the Stability 

In [86]:
dim = 1
G = nx.Graph()
adjacency_file = "Karnataka_District_Adjacency_File - Copy.xlsx"

column = df2["Maize_Production"]
column_values = column.values.reshape(-1, 1)
scaler = MinMaxScaler()
normalized_column_values = scaler.fit_transform(column_values)

column_to_pass = pd.Series(normalized_column_values.flatten(), name="Maize_Production")
init_graph_attr(G,adjacency_file , column_to_pass )

initialstability= get_node_stability(G,dim)
# df["Initial Stability"] = df["District"].map(initialstability)

In [87]:
df.head()

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021,1986.72735
1,BENGALURU(R),116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025,40824.8171
2,RAMANAGARA,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258,2981.768487
3,CHITRADURGA,0.0,28105.50725,32.509127,92194.0,5647.169517,37647.33395,10902.40657,283801.7238
4,DAVANAGERE,221.834115,20213.73088,402.912659,173400.0,8341.220655,27344.83354,25702.83548,561073.4115


In [None]:
# df.to_excel("MaizeCaseFile_2.xlsx",index=False)

In [92]:
df2

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Maize_Production
0,BENGALURU,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021,2879
1,BENGALURU(R),116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025,42583
2,RAMANAGARA,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258,10450
3,CHITRADURGA,0.0,28105.507253,32.509127,92194.0,5647.169517,37647.333954,10902.406571,271975
4,DAVANAGERE,221.834115,20213.730878,402.912659,173400.0,8341.220655,27344.83354,25702.83548,593544
5,KOLAR,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906,1759
6,CHIKKABALLAPURA,0.0,14559.991862,6.070277,59377.0,3298.472645,17213.260595,8196.896749,105491
7,SHIVAMOGGA,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.926996,10428.246256,226532
8,TUMAKURU,64.863914,10417.677296,0.647919,24207.0,1910.209867,10614.829758,2746.313635,66868
9,CHIKKAMAGALURU,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037,61353


In [88]:
x_new_all_comb[0:31]

Unnamed: 0,District,NetAreaIrrigated_Wells_NetAreaIrrigated,NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea,NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No,AreaUnderCereals_Maize,MotorVehicles_Tractors,TotalAgricultureLoan,TotalNPK,Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179),Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182),...,Sensitivity (AreaUnderCereals_Maize_193),Sensitivity (MotorVehicles_Tractors_514),Sensitivity (TotalAgricultureLoan),Sensitivity (TotalNPK_315),Maize_Production,New Stability,New Stress,Percentage Change,Relative Change,Abbreviation
0,BENGALURU,0.0,175.723235,0.0,1129.0,275.164389,98.025105,438.18021,0.0,0.0,...,0.0,0.0,0.0,0.0,1986.72735,0.978939,0.021061,-30.99245,-0.309925,BLR
1,BENGALURU(R),116.81476,3197.193185,0.157645,14867.0,154.6495,3429.301433,3036.08025,0.0,0.0,...,0.0,0.0,0.0,0.0,40824.8171,0.980141,0.019859,-4.128838,-0.041288,BLR(R)
2,RAMANAGARA,0.0,709.859804,0.118964,1987.0,339.256359,928.035816,222.225258,0.0,0.0,...,0.0,0.0,0.0,0.0,2981.768487,0.923336,0.076664,-71.46633,-0.714663,RGA
3,CHITRADURGA,0.0,28105.50725,32.509127,92194.0,5647.169517,37647.33395,10902.40657,0.0,0.0,...,0.0,0.0,0.0,0.0,283801.7238,0.870357,0.129643,4.34846,0.043485,CDA
4,DAVANAGERE,221.834115,20213.73088,402.912659,173400.0,8341.220655,27344.83354,25702.83548,0.0,0.0,...,0.0,0.0,0.0,0.0,561073.4115,0.409416,0.590584,-5.470629,-0.054706,DVG
5,KOLAR,0.0,200.386108,0.0,640.0,167.701482,772.943145,220.782906,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.872247,0.127753,-100.0,-1.0,KLR
6,CHIKKABALLAPURA,0.0,14559.99186,6.070277,59377.0,3298.472645,17213.2606,8196.896749,0.0,0.0,...,0.0,0.0,0.0,0.0,186759.7192,0.750927,0.249073,77.038533,0.770385,CKA
7,SHIVAMOGGA,1178.978184,5958.813645,764.862097,43827.0,3305.007438,18792.927,10428.24626,0.0,0.0,...,0.0,0.0,0.0,0.0,151193.3368,0.891715,0.108285,-33.257404,-0.332574,SMG
8,TUMAKURU,64.863914,10417.6773,0.647919,24207.0,1910.209867,10614.82976,2746.313635,0.0,0.0,...,0.0,0.0,0.0,0.0,71874.05483,0.897377,0.102623,7.486473,0.074865,TKR
9,CHIKKAMAGALURU,20.792453,3145.789961,489.764426,25831.0,1908.819293,9537.739741,8156.29037,0.0,0.0,...,0.0,0.0,0.0,0.0,95051.47185,0.830322,0.169678,54.925549,0.549255,CMG


In [89]:
def calculate_and_map_stability(G, df, adjacency_file, column_values, dim):
    G = nx.Graph()
    scaler = MinMaxScaler()
    
    normalized_column_values = scaler.fit_transform(column_values.reshape(-1, 1))
    column_to_pass = pd.Series(normalized_column_values.flatten(), name="Column")

    init_graph_attr(G, adjacency_file, column_to_pass)
    taluka_stress_dict = get_node_stability(G, dim)

    stability_column_name = "New Stability"
    stability_column = df["District"].map(taluka_stress_dict)

    return stability_column, stability_column_name

Cap_Vector = "Maize_Production"

column_values = x_new_all_comb[Cap_Vector].iloc[0:31].values
result_column, result_column_name = calculate_and_map_stability(G, df, adjacency_file, column_values, dim)

df[result_column_name] = result_column

In [90]:
def calculate_and_map_stress(G, df, adjacency_file, column_values, dim):
    G = nx.Graph()
    scaler = MinMaxScaler()
    
    normalized_column_values = scaler.fit_transform(column_values.reshape(-1, 1))
    column_to_pass = pd.Series(normalized_column_values.flatten(), name="Column")

    init_graph_attr(G, adjacency_file, column_to_pass)
    taluka_stress_dict = get_node_stress(G, dim)

    stress_column_name = "New Stress"
    stress_column = df["District"].map(taluka_stress_dict)

    return stress_column, stress_column_name

Cap_Vector = "Maize_Production"

column_values = x_new_all_comb[Cap_Vector].iloc[0:31].values 
result_column, result_column_name = calculate_and_map_stress(G, df, adjacency_file, column_values, dim)

df[result_column_name] = result_column

In [93]:
def calculate_and_map_stability(G, df, adjacency_file, column_values, dim):
    G = nx.Graph()

    scaler = MinMaxScaler()
    normalized_column_values = scaler.fit_transform(column_values.reshape(-1, 1))

    column_to_pass = pd.Series(normalized_column_values.flatten(), name="Column")

    init_graph_attr(G, adjacency_file, column_to_pass)

    stability_dict = get_node_stability(G, dim)
    stability_column = df["District"].map(stability_dict)

    return stability_column

x_new_all_comb["New Stability"] = np.nan
Cap_Vector = "Maize_Production"

chunk_size = 31
for i in range(0, len(x_new_all_comb), chunk_size):

    chunk_values = x_new_all_comb[Cap_Vector].iloc[i:i + chunk_size].values

    result_column = calculate_and_map_stability(G, df2, adjacency_file, chunk_values, dim)
    
    x_new_all_comb["New Stability"].iloc[i:i + chunk_size] = result_column

x_new_all_comb["New Stability"] = pd.to_numeric(x_new_all_comb["New Stability"])


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




In [None]:
def calculate_and_map_stress(G, df, adjacency_file, column_values, dim):
    G = nx.Graph()

    scaler = MinMaxScaler()
    normalized_column_values = scaler.fit_transform(column_values.reshape(-1, 1))

    column_to_pass = pd.Series(normalized_column_values.flatten(), name="Column")

    init_graph_attr(G, adjacency_file, column_to_pass)

    stress_dict = get_node_stress(G, dim)
    stress_column = df["District"].map(stress_dict)

    return stress_column

x_new_all_comb["New Stress"] = np.nan
Cap_Vector = "Maize_Production"

chunk_size = 31
for i in range(0, len(x_new_all_comb), chunk_size):

    chunk_values = x_new_all_comb[Cap_Vector].iloc[i:i + chunk_size].values

    result_column = calculate_and_map_stress(G, df2, adjacency_file, chunk_values, dim)
    
    x_new_all_comb["New Stress"].iloc[i:i + chunk_size] = result_column

x_new_all_comb["New Stress"] = pd.to_numeric(x_new_all_comb["New Stress"])

In [None]:
x_new_all_comb

In [None]:
# x_new_all_comb.to_excel("x_col_new_all_comb_10.xlsx", index =False)

In [None]:
# x_new_all_comb = pd.read_excel('x_col_new_all_comb_10.xlsx')
# x_new_all_comb = x_new_all_comb.dropna()
# x_new_all_comb

In [None]:
df = x_new_all_comb

In [None]:
df = Viz_df

In [None]:
# x_new_all_comb.to_excel("x_col_new_all_comb_10_20.xlsx", index =False)

import math

# Assuming x_new_all_comb is your DataFrame
chunk_size = 1000000  # Define the chunk size

num_chunks = math.ceil(len(df) / chunk_size)

for i in range(num_chunks):
    start_idx = i * chunk_size
    end_idx = min((i + 1) * chunk_size, len(df))
    chunk_df = df.iloc[start_idx:end_idx]
    filename = f"x_col_new_all_comb_10_20_part_{i+1}.csv"
    chunk_df.to_csv(filename, index=False)


In [None]:
df.columns

In [None]:
df2

In [None]:
df = df2

district_column = pd.DataFrame({
    # "KGISDist 1": df["KGISDist 1"].tolist() * (len(Viz_df) // len(df)),
    "District": df["District"].tolist() * (len(Viz_df) // len(df)),
    # "Target Maize Production": [target] * len(Viz_df)
})

district_column = pd.concat([district_column, district_column.head(len(Viz_df) % len(df))])

district_column.reset_index(drop=True, inplace=True)

Viz_df["District"] = district_column["District"]

Viz_df.drop(columns=["District"], inplace=True)


Viz_df.insert(0, "District", district_column["District"])

In [None]:
df = Viz_df

In [None]:
df

In [None]:
df.head()

Calculation of the percentage and relative change of the maize production

In [None]:
# df = df.reset_index(drop=True)

df['Percentage Change'] = df['Maize_Production'] - np.tile(y.values, len(df) // 31 + 1)[:len(df)]
for i in range(0, len(df), 31):
    df.loc[i:i+30, 'Percentage Change'] = df.loc[i:i+30, 'Percentage Change'] * 100 / y.values + 1e-9

df['Percentage Change'] = df['Percentage Change'].replace([np.inf, -np.inf], 0)

df['Relative Change'] = df['Maize_Production'] - np.tile(y.values, len(df) // 31 + 1)[:len(df)]
for i in range(0, len(df), 31):
    df.loc[i:i+30, 'Relative Change'] = df.loc[i:i+30, 'Relative Change'] / y.values + 1e-9

df['Relative Change'] = df['Relative Change'].replace([np.inf, -np.inf], 0)

In [None]:
df2

In [None]:
# df.to_csv("Maize_Prod_150000_FCR.csv", index =False)
# df.to_excel("Maize_Prod_150000.xlsx", index =False)

In [None]:
# x_new_all_comb['Maternal Deaths'].mean()

In [None]:
abbreviation_mapping = {
    'BENGALURU': 'BLR',
    'BENGALURU(R)': 'BLR(R)',
    'RAMANAGARA': 'RGA',
    'CHITRADURGA': 'CDA',
    'DAVANAGERE': 'DVG',
    'KOLAR': 'KLR',
    'CHIKKABALLAPURA': 'CKA',
    'SHIVAMOGGA': 'SMG',
    'TUMAKURU': 'TKR',
    'CHIKKAMAGALURU': 'CMG',
    'DAKSHINA KANNADA': 'DKA',
    'UDUPI': 'UPI',
    'HASSAN': 'HSN',
    'KODAGU': 'KDG',
    'MANDYA': 'MDY',
    'MYSURU': 'MYS',
    'CHAMARAJANAGAR': 'CNR',
    'BELAGAVI': 'BLG',
    'VIJAYAPURA': 'VJP',
    'BAGALKOT': 'BKT',
    'DHARAWAD': 'DWD',
    'GADAG': 'GDG',
    'HAVERI': 'HVR',
    'UTTARA KANNADA': 'UTK',
    'BALLARI': 'BLL',
    'BIDAR': 'BDR',
    'KALABURAGI': 'KLB',
    'YADGIRI': 'YDR',
    'RAICHUR': 'RCR',
    'KOPPAL': 'KPL',
    'Vijayanagara' : 'VJN'
}

df['Abbreviation'] = df['District'].map(abbreviation_mapping)

In [None]:
df

In [None]:
legend_labels = [{'Abbreviation': abb, 'Full Form': full_form} 
                    for abb, full_form in zip(df['Abbreviation'].unique(), df['District'].unique())]

In [None]:
legend_labels

The tolerance was added to the code to get the correct visualisation because pyhton was giving 0.60001 instead of 0.6 in the sensitivity values (and some other random places too).

In [None]:
y = df2['Maize_Production']

In [None]:
df = Viz_df

## Percentage Change vs Stability

Visualization of the distribution of the values

In [None]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import clear_output
import plotly.graph_objects as go

def update_plot(alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7):
    
    tolerance = 1e-5 
    filtered_df = df[(df['Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179)'].between(alpha1 - tolerance, alpha1 + tolerance)) &
                 (df['Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182)'].between(alpha2 - tolerance, alpha2 + tolerance)) &
                 (df['Sensitivity (NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No_183)'].between(alpha3 - tolerance, alpha3 + tolerance)) & 
                 (df['Sensitivity (AreaUnderCereals_Maize_193)'].between(alpha4 - tolerance, alpha4 + tolerance)) &
                 (df['Sensitivity (MotorVehicles_Tractors_514)'].between(alpha5 - tolerance, alpha5 + tolerance)) &
                 (df['Sensitivity (TotalAgricultureLoan)'].between(alpha6 - tolerance, alpha6 + tolerance)) &
                 (df['Sensitivity (TotalNPK_315)'].between(alpha7 - tolerance, alpha7 + tolerance))]

    
    fig = px.scatter(filtered_df, x= 'New Stability', y='Percentage Change', title='Percentage Change vs Stability',
                     labels={'Percentage Change': 'Percentage Change', 'New Stability': 'Stability'},
                     text='Abbreviation', opacity=1,color_discrete_sequence=['darkblue'])

    fig.update_traces(textposition='top center', textfont=dict(size=11))

    fig.update_layout(
        annotations=[
            dict(
                x=0.5,
                y=-0.25,
                showarrow=False,
                text=f'α<sub>IW</sub>: {alpha1:.2f}, α<sub>ITW</sub>: {alpha2:.2f}, α<sub>LI</sub>: {alpha3:.2f},α<sub>AM</sub>: {alpha4:.2f}, α<sub>TC</sub>: {alpha5:.2f}, α<sub>AL</sub>: {alpha6:.2f},α<sub>NPK</sub>: {alpha7:.2f} ',
                xref="paper",
                yref="paper",
                font=dict(size=13)
            )
        ],
        legend_title_text='Districts',
        legend_title=dict(font=dict(size=12)),
    )

    legend_labels = [{'Abbreviation': abb, 'Full Form': full_form} 
                     for abb, full_form in zip(filtered_df['Abbreviation'].unique(), filtered_df['District'].unique())]

    for label in legend_labels:
        fig.add_trace(go.Scatter(
            x=[None],
            y=[None],
            mode='markers',
            marker=dict(color='darkblue'),
            name=f"{label['Abbreviation']}: {label['Full Form']}"
        ))

    avg_stability = filtered_df['New Stability'].mean()
    avg_precent = filtered_df['Percentage Change'].mean()

    fig.add_shape(
        type='line',
        x0=avg_stability, y0=0, x1=avg_stability, y1=1,
        line=dict(color='red', width=1.5, dash='dash'),
        yref='paper'
    )

    fig.add_shape(
        type='line',
        x0=0, y0=avg_precent, x1=1, y1=avg_precent,
        line=dict(color='red', width=1.5, dash='dash'),
        xref='paper'
    )

    fig.add_trace(go.Scatter(
        x=[avg_stability],
        y=[-0.07],
        text=[f'Avg Stability: {avg_stability:.2f}'],
        mode="text",
        showlegend=False,
        textfont=dict(size=10.7)
    ))

    fig.add_trace(go.Scatter(
        x=[0.3],
        y=[avg_precent-0.04],
        text=[f'Avg Percentage Change: {avg_precent:.2f}'],
        mode="text",
        showlegend=False,
        textfont=dict(size=10.7)
    ))

    clear_output()

    display(fig)
    
alpha1_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αIW:')
alpha2_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αITW:')
alpha3_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αLI:')
alpha4_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αAM:')
alpha5_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αTC:')
alpha6_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αAL:')
alpha7_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αNPK:')

interactive_plot = interact(update_plot, alpha1=alpha1_slider, alpha2=alpha2_slider, alpha3=alpha3_slider, alpha4=alpha4_slider, alpha5=alpha5_slider, alpha6=alpha6_slider,alpha7=alpha7_slider, __manual=True)
update_button = widgets.Button(description="Update Plot")
update_button.on_click(lambda _: update_plot(alpha1_slider.value, alpha2_slider.value, alpha3_slider.value, alpha4_slider.value, alpha5_slider.value, alpha6_slider.value, alpha7_slider.value))
widgets.HBox([update_button, alpha1_slider, alpha2_slider, alpha3_slider, alpha4_slider, alpha5_slider, alpha6_slider, alpha7_slider])


## Relative Change vs Stability

In [None]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import clear_output
import plotly.graph_objects as go

def update_plot(alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7):
    
    tolerance = 1e-5 
    filtered_df = df[(df['Sensitivity (NetAreaIrrigated_Wells_NetAreaIrrigated_179)'].between(alpha1 - tolerance, alpha1 + tolerance)) &
                 (df['Sensitivity (NetAreaIrrigatedUnderDifferentSources_TubeWells_NetIrrigatedArea_182)'].between(alpha2 - tolerance, alpha2 + tolerance)) &
                 (df['Sensitivity (NetAreaIrrigatedUnderDifferentSources_LiftIrrigation_No_183)'].between(alpha3 - tolerance, alpha3 + tolerance)) & 
                 (df['Sensitivity (AreaUnderCereals_Maize_193)'].between(alpha4 - tolerance, alpha4 + tolerance)) &
                 (df['Sensitivity (MotorVehicles_Tractors_514)'].between(alpha5 - tolerance, alpha5 + tolerance)) &
                 (df['Sensitivity (TotalAgricultureLoan)'].between(alpha6 - tolerance, alpha6 + tolerance)) &
                 (df['Sensitivity (TotalNPK_315)'].between(alpha7 - tolerance, alpha7 + tolerance))]

    
    fig = px.scatter(filtered_df, x= 'New Stability', y='Relative Change', title='Relative Change vs Stability',
                     labels={'Relative Change': 'Relative Change', 'New Stability': 'Stability'},
                     text='Abbreviation', opacity=1,color_discrete_sequence=['darkblue'])

    fig.update_traces(textposition='top center', textfont=dict(size=11))

    fig.update_layout(
        annotations=[
            dict(
                x=0.5,
                y=-0.25,
                showarrow=False,
                text=f'α<sub>IW</sub>: {alpha1:.2f}, α<sub>ITW</sub>: {alpha2:.2f}, α<sub>LI</sub>: {alpha3:.2f},α<sub>AM</sub>: {alpha4:.2f}, α<sub>TC</sub>: {alpha5:.2f}, α<sub>AL</sub>: {alpha6:.2f},α<sub>NPK</sub>: {alpha7:.2f} ',
                xref="paper",
                yref="paper",
                font=dict(size=13)
            )
        ],
        legend_title_text='Districts',
        legend_title=dict(font=dict(size=12)),
    )

    legend_labels = [{'Abbreviation': abb, 'Full Form': full_form} 
                     for abb, full_form in zip(filtered_df['Abbreviation'].unique(), filtered_df['District'].unique())]

    for label in legend_labels:
        fig.add_trace(go.Scatter(
            x=[None],
            y=[None],
            mode='markers',
            marker=dict(color='darkblue'),
            name=f"{label['Abbreviation']}: {label['Full Form']}"
        ))

    avg_stability = filtered_df['New Stability'].mean()
    avg_precent = filtered_df['Relative Change'].mean()

    fig.add_shape(
        type='line',
        x0=avg_stability, y0=0, x1=avg_stability, y1=1,
        line=dict(color='red', width=1.5, dash='dash'),
        yref='paper'
    )

    fig.add_shape(
        type='line',
        x0=0, y0=avg_precent, x1=1, y1=avg_precent,
        line=dict(color='red', width=1.5, dash='dash'),
        xref='paper'
    )

    fig.add_trace(go.Scatter(
        x=[avg_stability],
        y=[-0.07],
        text=[f'Avg Stability: {avg_stability:.2f}'],
        mode="text",
        showlegend=False,
        textfont=dict(size=10.7)
    ))

    fig.add_trace(go.Scatter(
        x=[0.3],
        y=[avg_precent-0.04],
        text=[f'Avg Relative Change: {avg_precent:.2f}'],
        mode="text",
        showlegend=False,
        textfont=dict(size=10.7)
    ))

    clear_output()

    display(fig)
    
alpha1_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αIW:')
alpha2_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αITW:')
alpha3_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αLI:')
alpha4_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αAM:')
alpha5_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αTC:')
alpha6_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αAL:')
alpha7_slider = widgets.FloatSlider(value=0.5, min=0, max=1, step=0.25, description='αNPK:')

interactive_plot = interact(update_plot, alpha1=alpha1_slider, alpha2=alpha2_slider, alpha3=alpha3_slider, alpha4=alpha4_slider, alpha5=alpha5_slider, alpha6=alpha6_slider,alpha7=alpha7_slider, __manual=True)
update_button = widgets.Button(description="Update Plot")
update_button.on_click(lambda _: update_plot(alpha1_slider.value, alpha2_slider.value, alpha3_slider.value, alpha4_slider.value, alpha5_slider.value, alpha6_slider.value, alpha7_slider.value))
widgets.HBox([update_button, alpha1_slider, alpha2_slider, alpha3_slider, alpha4_slider, alpha5_slider, alpha6_slider, alpha7_slider])
