# Create the Socio Economic Sensetivity Dimension Matrix

The objective of this notebook is to create the final output for the socioeconomic sensetivity survey. The output is a matrix where the columns are the different indicators of socio-economic sensetivity and the rows are indexed by resource and fisher (respodent). Demographic information will also be included.

**Economic Dependence**<br>
ED1: Proportion of monthly income from resource during hot season<br>
ED1: Proportion of monthly income from resource during cold season<br>
**Nutritional Dependence** <br>
ND1: Proportion of animal protein from resource during hot season<br>
ND1: Proportion of animal protein from resource during cold season <br>
**Cultural Connection** <br>
CC1: Tradition score <br>
CC2: Social union score <br>
CC3: Identity score <br>

In [89]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
from zipfile import ZipFile
import seaborn as sns
import math

In [90]:
demo_df = pd.read_csv('/home/rthoms/ocean_dep/sur_B/clean_up/survey_main_table_clean.csv')
ed_df = pd.read_csv('/home/rthoms/ocean_dep/sur_B/income_table_clean.csv')
nd_df = pd.read_csv('/home/rthoms/ocean_dep/sur_B/nutrition_table_clean.csv')
cc1_df = pd.read_csv('/home/rthoms/ocean_dep/sur_B/trad_table_clean.csv')
cc2_df = pd.read_csv('/home/rthoms/ocean_dep/sur_B/soc_table_clean.csv')
cc3_df = pd.read_csv('/home/rthoms/ocean_dep/sur_B/iden_table_clean.csv')


In [91]:
nd_df

Unnamed: 0.1,Unnamed: 0,nutrition_source,gitonga_s,gitonga_p,portuguese,scientific,family,phylum,category,focal,...,nutrition_prop_hot_raw,nutrition_prop_hot,nutrition_prop_cold_raw,nutrition_prop_cold,ParentGlobalID,ccp,gender,fish_pract,pract_cat,id
0,0,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,2,0.066667,2,0.066667,5242b473-cb0e-43ec-bc8f-6e626f4ee1d3,nhamua,f,glean,glean,13022023NHAMUB02
1,426,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,4,0.133333,4,0.133333,caac8748-ebed-413c-a5d2-a635b5bd8932,nhampossa,m,fence_trap,fish,09022023NHMPB04
2,434,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,5,0.166667,3,0.100000,a9325c3b-f81d-431a-9628-6b0b57ede5e3,nhampossa,f,gill_net,fish,09022023NHMPB05
3,455,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,3,0.100000,3,0.100000,be5b94c1-65c2-44dc-af3c-8a0c1f35f055,madava,m,drag_net,fish,08022023MADB02
4,489,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,8,0.266667,8,0.266667,8407b1b2-9c62-4161-a560-a8c8415ca76a,madava,m,gill_net,fish,08022023MADB06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947,118,99,,,carne,nonfish_meat,nonfish_meat,,,,...,6,0.200000,8,0.266667,074701a4-86f2-4beb-a67e-b12fc841f7fe,kuguana,f,drag_net,fish,07022023CUGB04
948,119,99,,,carne,nonfish_meat,nonfish_meat,,,,...,3,0.100000,0,0.000000,368b8b4c-a618-434a-8ebe-7f7fa8c143fc,kuguana,m,drag_net,fish,07022023CUGB05
949,120,99,,,carne,nonfish_meat,nonfish_meat,,,,...,2,0.066667,2,0.066667,454f8afd-86aa-4418-80e3-4e1ccb572b8c,kuguana,m,fence_trap,fish,07022023CUGB06
950,121,99,,,carne,nonfish_meat,nonfish_meat,,,,...,3,0.100000,0,0.000000,1dfbdb30-77d7-4a1e-aaf1-2a5bccccfb75,kuguana,m,fence_trap,fish,07022023CUGB07


In [92]:
demo_df.columns

Index(['Unnamed: 0', 'ParentGlobalID', 'id', 'ccp', 'gender', 'fish_pract',
       'owner', 'catch_proportion', 'pract_cat'],
      dtype='object')

In [93]:
demo_df=demo_df[['id','gender','ccp','fish_pract','pract_cat']]

In [94]:
# drop duplicates from the cleaned table (if multiple gears were recorded, each has an entry)
demo_df= demo_df.drop_duplicates(subset=['id'], keep='first')
demo_df.shape

(101, 5)

In [95]:
ed_df.columns

Index(['Unnamed: 0', 'focal_resource', 'gitonga_s', 'gitonga_p', 'portuguese',
       'scientific', 'family', 'phylum', 'category', 'cap_income_hot',
       'cap_income_cold', 'prop_income_hot', 'prop_income_cold',
       'est_prop_income_hot', 'est_prop_income_cold', 'id', 'ccp', 'gender',
       'fish_pract', 'pract_cat'],
      dtype='object')

In [96]:
ed_df.shape

(280, 20)

In [97]:
ed_df_dup = ed_df[ed_df.duplicated(subset=['id','focal_resource'], keep=False)]
ed_df_dup.sort_values('id')

Unnamed: 0.1,Unnamed: 0,focal_resource,gitonga_s,gitonga_p,portuguese,scientific,family,phylum,category,cap_income_hot,cap_income_cold,prop_income_hot,prop_income_cold,est_prop_income_hot,est_prop_income_cold,id,ccp,gender,fish_pract,pract_cat


In [98]:
nd_df = nd_df.rename(columns={'nutrition_source': 'focal_resource'})
nd_df.shape

(952, 22)

In [99]:
nd_df

Unnamed: 0.1,Unnamed: 0,focal_resource,gitonga_s,gitonga_p,portuguese,scientific,family,phylum,category,focal,...,nutrition_prop_hot_raw,nutrition_prop_hot,nutrition_prop_cold_raw,nutrition_prop_cold,ParentGlobalID,ccp,gender,fish_pract,pract_cat,id
0,0,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,2,0.066667,2,0.066667,5242b473-cb0e-43ec-bc8f-6e626f4ee1d3,nhamua,f,glean,glean,13022023NHAMUB02
1,426,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,4,0.133333,4,0.133333,caac8748-ebed-413c-a5d2-a635b5bd8932,nhampossa,m,fence_trap,fish,09022023NHMPB04
2,434,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,5,0.166667,3,0.100000,a9325c3b-f81d-431a-9628-6b0b57ede5e3,nhampossa,f,gill_net,fish,09022023NHMPB05
3,455,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,3,0.100000,3,0.100000,be5b94c1-65c2-44dc-af3c-8a0c1f35f055,madava,m,drag_net,fish,08022023MADB02
4,489,1,hili,mihili,pescadinha,Sillago sihama,Sillaginidae,bony fish,fish,1.0,...,8,0.266667,8,0.266667,8407b1b2-9c62-4161-a560-a8c8415ca76a,madava,m,gill_net,fish,08022023MADB06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947,118,99,,,carne,nonfish_meat,nonfish_meat,,,,...,6,0.200000,8,0.266667,074701a4-86f2-4beb-a67e-b12fc841f7fe,kuguana,f,drag_net,fish,07022023CUGB04
948,119,99,,,carne,nonfish_meat,nonfish_meat,,,,...,3,0.100000,0,0.000000,368b8b4c-a618-434a-8ebe-7f7fa8c143fc,kuguana,m,drag_net,fish,07022023CUGB05
949,120,99,,,carne,nonfish_meat,nonfish_meat,,,,...,2,0.066667,2,0.066667,454f8afd-86aa-4418-80e3-4e1ccb572b8c,kuguana,m,fence_trap,fish,07022023CUGB06
950,121,99,,,carne,nonfish_meat,nonfish_meat,,,,...,3,0.100000,0,0.000000,1dfbdb30-77d7-4a1e-aaf1-2a5bccccfb75,kuguana,m,fence_trap,fish,07022023CUGB07


In [100]:
nd_df[['id','focal_resource','nutrition_prop_hot','nutrition_prop_cold','pract_cat']]

Unnamed: 0,id,focal_resource,nutrition_prop_hot,nutrition_prop_cold,pract_cat
0,13022023NHAMUB02,1,0.066667,0.066667,glean
1,09022023NHMPB04,1,0.133333,0.133333,fish
2,09022023NHMPB05,1,0.166667,0.100000,fish
3,08022023MADB02,1,0.100000,0.100000,fish
4,08022023MADB06,1,0.266667,0.266667,fish
...,...,...,...,...,...
947,07022023CUGB04,99,0.200000,0.266667,fish
948,07022023CUGB05,99,0.100000,0.000000,fish
949,07022023CUGB06,99,0.066667,0.066667,fish
950,07022023CUGB07,99,0.100000,0.000000,fish


In [101]:
df = pd.merge(ed_df[['id','focal_resource','prop_income_hot','prop_income_cold','est_prop_income_hot','est_prop_income_cold','pract_cat']], nd_df[['id','focal_resource','nutrition_prop_hot','nutrition_prop_cold','pract_cat']], how = 'outer', on = ['id','focal_resource','pract_cat'])
df.shape

(976, 9)

In [102]:
df

Unnamed: 0,id,focal_resource,prop_income_hot,prop_income_cold,est_prop_income_hot,est_prop_income_cold,pract_cat,nutrition_prop_hot,nutrition_prop_cold
0,O1O22023MOR06,1,0.015201,0.015767,0.029990,0.055115,fish,0.166667,0.133333
1,24012023CUGB01,53,0.020040,0.000000,0.129936,0.278330,fish,,
2,24012023CUGB01,1,0.008946,0.229358,0.006961,0.029821,fish,0.333333,0.300000
3,01022023MORB07,56,0.381727,0.000000,0.380788,0.000000,fish,0.200000,0.200000
4,01022023MORB07,54,0.034545,0.022054,0.096920,0.190540,fish,,
...,...,...,...,...,...,...,...,...,...
971,07022023CUGB04,99,,,,,fish,0.200000,0.266667
972,07022023CUGB05,99,,,,,fish,0.100000,0.000000
973,07022023CUGB06,99,,,,,fish,0.066667,0.066667
974,07022023CUGB07,99,,,,,fish,0.100000,0.000000


In [103]:
cc1_df = cc1_df.rename(columns={'resource_id': 'focal_resource'})
cc1_df.shape

(235, 19)

In [104]:
df = pd.merge(df, cc1_df[['id','focal_resource','tradition','pract_cat']], how = 'outer', on = ['id','focal_resource','pract_cat']).fillna(0)
df.shape

(1007, 10)

In [105]:
df_dup = df[df.duplicated(subset=['id','focal_resource'], keep=False)]
df_dup

Unnamed: 0,id,focal_resource,prop_income_hot,prop_income_cold,est_prop_income_hot,est_prop_income_cold,pract_cat,nutrition_prop_hot,nutrition_prop_cold,tradition
3,01022023MORB07,56,0.381727,0.000000,0.380788,0.000000,fish,0.200000,0.200000,0.0
5,01022023MORB07,50,0.352619,0.284258,0.296791,0.314349,fish,0.200000,0.200000,0.0
6,01022023MORB07,10,0.217636,0.566065,0.217101,0.260829,fish,0.166667,0.166667,0.0
7,01022023MORB07,1,0.013473,0.035728,0.008400,0.068595,fish,0.100000,0.100000,1.0
17,07022023CUGB04,56,0.133779,0.082645,0.105766,0.474820,fish,0.000000,0.366667,0.0
...,...,...,...,...,...,...,...,...,...,...
968,24012023CUGB02,99,0.000000,0.000000,0.000000,0.000000,fish,0.133333,0.100000,0.0
970,07022023CUGB04,99,0.000000,0.000000,0.000000,0.000000,shrimp_net,0.200000,0.266667,0.0
971,07022023CUGB04,99,0.000000,0.000000,0.000000,0.000000,fish,0.200000,0.266667,0.0
977,08022023MADB01,70,0.000000,0.000000,0.000000,0.000000,shrimp_net,0.000000,0.000000,1.0


In [106]:
cc2_df = cc2_df.rename(columns={'resource_id': 'focal_resource'})
cc2_df.shape
cc2_df.columns

Index(['Unnamed: 0', 'focal_resource', 'sg_dep_median', 'sg_dep_cat', 'social',
       'ParentGlobalID', 'gitonga_s', 'gitonga_p', 'portuguese', 'scientific',
       'family', 'phylum', 'category', 'focal', 'id', 'ccp', 'gender',
       'fish_pract', 'pract_cat'],
      dtype='object')

In [107]:
df = pd.merge(df, cc2_df[['id','focal_resource','social','pract_cat']], how = 'outer', on = ['id','focal_resource','pract_cat']).fillna(0)
df.shape

(1016, 11)

In [108]:
cc3_df = cc3_df.rename(columns={'resource_id': 'focal_resource'})
cc3_df.shape

(353, 19)

In [109]:
df = pd.merge(df, cc3_df[['id','focal_resource','identity','pract_cat']], how = 'outer', on = ['id','focal_resource','pract_cat']).fillna(0)
df.shape

(1023, 12)

In [110]:
df

Unnamed: 0,id,focal_resource,prop_income_hot,prop_income_cold,est_prop_income_hot,est_prop_income_cold,pract_cat,nutrition_prop_hot,nutrition_prop_cold,tradition,social,identity
0,O1O22023MOR06,1,0.015201,0.015767,0.029990,0.055115,fish,0.166667,0.133333,0.533333,0.200000,0.200000
1,24012023CUGB01,53,0.020040,0.000000,0.129936,0.278330,fish,0.000000,0.000000,0.000000,0.166667,0.066667
2,24012023CUGB01,1,0.008946,0.229358,0.006961,0.029821,fish,0.333333,0.300000,0.266667,0.133333,0.233333
3,01022023MORB07,56,0.381727,0.000000,0.380788,0.000000,fish,0.200000,0.200000,0.000000,0.400000,1.000000
4,01022023MORB07,54,0.034545,0.022054,0.096920,0.190540,fish,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
1018,07022023CUGBO8,50,0.000000,0.000000,0.000000,0.000000,glean,0.000000,0.000000,0.000000,0.000000,0.200000
1019,08022023MADB08,53,0.000000,0.000000,0.000000,0.000000,glean,0.000000,0.000000,0.000000,0.000000,0.333333
1020,07022023CUGBO8,55,0.000000,0.000000,0.000000,0.000000,glean,0.000000,0.000000,0.000000,0.000000,0.200000
1021,07022023CUGBO8,56,0.000000,0.000000,0.000000,0.000000,glean,0.000000,0.000000,0.000000,0.000000,0.200000


In [111]:

# Create an empty dataset with all ID, resource pairs
col_list = ['prop_income_hot','prop_income_cold', 'nutrition_prop_hot', 'nutrition_prop_cold', 'tradition', 'social', 'identity']
df['id_prac'] = df['id'] + "_" + df['pract_cat']
ids = list(df['id_prac'].unique())
resources = list(df['focal_resource'].unique())

levels = [resources,ids]
new_index = pd.MultiIndex.from_product(levels, names=['focal_resource','id_prac'])
df_0 = pd.DataFrame(index=new_index, columns=col_list).fillna(0) 

# Calculte size of df of 0s 
print("resources =" + str(resources) + " length =" + str(len(resources)))
print("ids =" + str(ids) + " length =" + str(len(ids)) )
print("df of 0s length " + str(len(ids)*len(resources)))
print("actual size = " + str(len(df_0)))

# drop any pairs that are already in the dataframe
df_index = df.set_index(['focal_resource','id_prac'])
index_list= (list(df_index.index))
df_0.drop(index_list, axis=0, inplace=True)
df_0.reset_index(inplace=True)
print("removing existing entries - " + str(len(df_0)))

split =df_0['id_prac'].str.split("_",1,expand = True)
df_0['id'] = split[0]
df_0['pract_cat'] = split[1]

# stack the two DataFrames
df_filled = pd.concat([df, df_0], ignore_index=True, axis=0)
df_filled



resources =[1, 53, 56, 54, 50, 10, 55, 2, 6, 3, 52, 78, 4, 7, 61, 70, 68, 63, 71, 69, 60, 64, 74, 37, 11, 99] length =26
ids =['O1O22023MOR06_fish', '24012023CUGB01_fish', '01022023MORB07_fish', '07022023CUGB05_fish', '07022023CUGB04_fish', '17022023NHCHB03_fish', '17022023NHCHB04_fish', '31012023MUCB06_fish', '08022023MADB05_fish', '08022023MADB06_fish', '04032023NGUB05_fish', '24012023CUGB02_fish', '19022023ILHB07_fish', '04032023NGUB06_fish', '16022023NHMUB06_fish', '15022023JOSB01_fish', '04022023CHAB08_fish', '02022023MARB04_fish', '02022023MARB07_fish', '04022023CHAB07_fish', '16022023NHMUB07_fish', '13022023NHMUB03_fish', '15022023JOSB02_fish', '15022023JOSB07_fish', '07022023CUGB06_fish', '07022023CUGB07_fish', '14022023SAHB08_fish', '13022023NHMUB01_fish', '18022023ILHB01_fish', '01022023MORBO5_fish', '08022023MADB03_fish', '02022023MARB05_fish', '08022023MADB01_fish', '31012023MUB02_fish', '31012023MUCB01_fish', '02022023MARB06_fish', '19022023ILHB05_fish', '17022023NHCHB05_f

  split =df_0['id_prac'].str.split("_",1,expand = True)


Unnamed: 0,id,focal_resource,prop_income_hot,prop_income_cold,est_prop_income_hot,est_prop_income_cold,pract_cat,nutrition_prop_hot,nutrition_prop_cold,tradition,social,identity,id_prac
0,O1O22023MOR06,1,0.015201,0.015767,0.029990,0.055115,fish,0.166667,0.133333,0.533333,0.200000,0.200000,O1O22023MOR06_fish
1,24012023CUGB01,53,0.020040,0.000000,0.129936,0.278330,fish,0.000000,0.000000,0.000000,0.166667,0.066667,24012023CUGB01_fish
2,24012023CUGB01,1,0.008946,0.229358,0.006961,0.029821,fish,0.333333,0.300000,0.266667,0.133333,0.233333,24012023CUGB01_fish
3,01022023MORB07,56,0.381727,0.000000,0.380788,0.000000,fish,0.200000,0.200000,0.000000,0.400000,1.000000,01022023MORB07_fish
4,01022023MORB07,54,0.034545,0.022054,0.096920,0.190540,fish,0.000000,0.000000,0.000000,0.000000,0.000000,01022023MORB07_fish
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3193,07022023CUGB04,11,0.000000,0.000000,,,shrimp_net,0.000000,0.000000,0.000000,0.000000,0.000000,07022023CUGB04_shrimp_net
3194,01022023MORB03,11,0.000000,0.000000,,,glean,0.000000,0.000000,0.000000,0.000000,0.000000,01022023MORB03_glean
3195,01022023MORB07,11,0.000000,0.000000,,,glean,0.000000,0.000000,0.000000,0.000000,0.000000,01022023MORB07_glean
3196,09022023NHMPB01,11,0.000000,0.000000,,,fish,0.000000,0.000000,0.000000,0.000000,0.000000,09022023NHMPB01_fish


In [112]:
df = pd.merge(demo_df, df_filled, how='right', on = ['id','pract_cat'])
df.shape

(3198, 16)

In [113]:
ED_indicators = ['prop_income_hot', 'prop_income_cold']
ND_indicators = ['nutrition_prop_hot', 'nutrition_prop_cold']
CC_indicators = ['tradition', 'social',  'identity']

def calculate_factor(indicators, row):
    val_sum = 0
    indicator_list = []
    for indicator in indicators:
        indicator_val = row[indicator]
        if math.isnan(indicator_val):
            pass
        else:
            val_sum = val_sum + indicator_val
            indicator_list.append(indicator)
    if len(indicator_list)>0:
        factor = val_sum/len(indicator_list)
        return factor
    else:
        return None

# apply function
df_filled['Economic Dependence']=df_filled.apply(lambda row: calculate_factor(ED_indicators, row), axis=1 )
df_filled['Nutritional Dependence']=df_filled.apply(lambda row: calculate_factor(ND_indicators, row), axis=1 )
df_filled['Cultural Connection']=df_filled.apply(lambda row: calculate_factor(CC_indicators, row), axis=1 )

In [114]:
df_filled['Socioeconomic Sensitivity'] = (df_filled['Economic Dependence'] + df_filled['Nutritional Dependence']+ df_filled['Cultural Connection'])

In [115]:
summary = df_filled.groupby(['pract_cat','focal_resource']).agg(
    Economic_dependence=('Economic Dependence', np.mean),
    Nutritional_dependence =('Nutritional Dependence', np.mean),
    Cultural_connection=('Cultural Connection', np.mean),
    Socioeconomic_sensitivity=('Socioeconomic Sensitivity', np.mean))

summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Economic_dependence,Nutritional_dependence,Cultural_connection,Socioeconomic_sensitivity
pract_cat,focal_resource,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
fish,1,0.158137,0.106790,0.227734,0.492661
fish,2,0.084078,0.086728,0.070841,0.241647
fish,3,0.103929,0.071296,0.135141,0.310366
fish,4,0.010124,0.032099,0.019136,0.061359
fish,6,0.056778,0.058025,0.029835,0.144638
...,...,...,...,...,...
shrimp_net,70,0.000000,0.010294,0.022222,0.032516
shrimp_net,71,0.000519,0.004902,0.000000,0.005421
shrimp_net,74,0.000000,0.008824,0.005556,0.014379
shrimp_net,78,0.000000,0.023039,0.013072,0.036111


In [116]:
# load table with identifying species info
species = pd.read_csv("/home/rthoms/ocean_dep/sur_B/clean_up/species.csv", dtype={'resource_id': int})
# rename the resource-id column for merging 
species.rename(columns={'resource_id':'focal_resource'}, inplace=True)
summary.reset_index(inplace=True)
summary = summary.astype({"focal_resource": object})
# merge
summary = pd.merge(species[['focal_resource','scientific']],summary,on='focal_resource', how='right')
summary.set_index(['pract_cat', 'focal_resource'], inplace=True)
summary


Unnamed: 0_level_0,Unnamed: 1_level_0,scientific,Economic_dependence,Nutritional_dependence,Cultural_connection,Socioeconomic_sensitivity
pract_cat,focal_resource,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
fish,1,Sillago sihama,0.158137,0.106790,0.227734,0.492661
fish,2,Gerres oyena,0.084078,0.086728,0.070841,0.241647
fish,3,Gerres longirostris,0.103929,0.071296,0.135141,0.310366
fish,4,Terapon jarbua,0.010124,0.032099,0.019136,0.061359
fish,6,Crenidens crenidens,0.056778,0.058025,0.029835,0.144638
...,...,...,...,...,...,...
shrimp_net,70,Serratina capsoides,0.000000,0.010294,0.022222,0.032516
shrimp_net,71,Callista florida,0.000519,0.004902,0.000000,0.005421
shrimp_net,74,Potamididae,0.000000,0.008824,0.005556,0.014379
shrimp_net,78,Volema pyrum,0.000000,0.023039,0.013072,0.036111


In [117]:
summary.to_csv("../socioeconomic_sensitivity_matrix_summary.csv")