In [221]:
import pandas as pd
import numpy as np

In [222]:
df = pd.read_csv("../main_data_id.csv", index_col = 'id')

In [223]:
df.head()

Unnamed: 0_level_0,record_id,Region,commodity_value,edu_level,fbo_membership,social_membership,smb_clim_infor,climate_access,clim_infor_useful,source_information,...,other_asset2,other_asset3,other_asset4,other_asset5,other_asset6,other_asset7,other_asset8,other_asset9,other_asset10,other_asset11
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1234175,Bono East,Yam,Primary education,Yes,Yes,Yes,Yes,very useful,"Telephone, Radio, Neighbor",...,Cutlass,Mobilephone,Television,Motorbike,Radio,,,,,
2,1234300,Bono East,Yam,,Yes,Yes,No,Yes,very useful,Extension Agents,...,Cutlass,Mobilephone,Motorbike,Radio,,,,,,
3,1234301,Bono East,"Maize, Yam",,No,Yes,No,Yes,somewhat useful,"Radio, Extension Agents, Other farmers",...,Cutlass,Mobilephone,Radio,,,,,,,
4,1235047,Bono East,"Maize, Yam",Primary education,Yes,Yes,Yes,Yes,very useful,"Radio, Extension Agents",...,Bike,Cutlass,Mobilephone,Television,Radio,,,,,
5,1235048,Bono East,"Maize, Yam",Secondary eduaction,Yes,Yes,Yes,Yes,very useful,Radio,...,Cutlass,Mobilephone,Radio,Motorbike,,,,,,


In [224]:
#function to extract list of values from columns
def extract_details(list_of_col_values):
    cleaned_list = []
    for val in list_of_col_values:
        res = val.split(",") #in case there are multiple values per line
        for r in res:
            cleaned_list.append(r.strip())
    return cleaned_list

In [225]:
def create_df_with_ids(unique_list):
    comm = []
    for id in range(len(unique_list)):
        new_id = id+1 #because IDs must start from 1
        comm.append({'ID':new_id, 'Name':unique_list[id]})
    df = pd.DataFrame(comm)
    return df

# Extract Commodities

In [226]:
df['commodity_value'].fillna("N/A", inplace = True)

In [227]:
commodity_names = df['commodity_value'].tolist()

In [228]:
#some of the rows contain multiple values
#so we must split them and insert them individually into the final list
cleaned_list = extract_details(commodity_names)
print(pd.unique(cleaned_list))

['Yam' 'Maize' 'N/A' 'Sweet Potato' 'Tomatoes' 'Pepper' 'Cowpea']


In [229]:
unique_commodities = list(pd.unique(cleaned_list))
unique_commodities.remove("N/A")
unique_commodities

['Yam', 'Maize', 'Sweet Potato', 'Tomatoes', 'Pepper', 'Cowpea']

In [230]:
commodities_df = create_df_with_ids(unique_commodities)
commodities_df.to_csv('../data/commodity.csv', index=False, header=True)

# Extract Source Information

In [231]:
df['source_information'].fillna("N/A", inplace = True)

In [232]:
source_names = df['source_information'].tolist()

In [233]:
cleaned_list = extract_details(source_names)
print(pd.unique(cleaned_list))

['Telephone' 'Radio' 'Neighbor' 'Extension Agents' 'Other farmers' 'N/A'
 'TV' 'Farmer-Based Organization' 'Community information centers']


In [234]:
unique_items = list(pd.unique(cleaned_list))
unique_items.remove("N/A")
unique_items

['Telephone',
 'Radio',
 'Neighbor',
 'Extension Agents',
 'Other farmers',
 'TV',
 'Farmer-Based Organization',
 'Community information centers']

In [235]:
new_df = create_df_with_ids(unique_items)
new_df.to_csv('../data/source_information.csv', index=False, header=True)

# Extract Prioritized CSA Input

In [236]:
df['prioritized_csa_input'].fillna("N/A", inplace = True)
csa_names = df['prioritized_csa_input'].tolist()

In [237]:
cleaned_list = extract_details(csa_names)
print(pd.unique(cleaned_list))

['N/A' 'Cutlass' 'Improved seed of maize' 'NPK Fertilizer' 'Mobile phone'
 'Insecticides' 'Ammonia/Urea fertilizer' 'Hoe' 'Tractor' 'Crop Residue'
 'Radio' 'Sack' 'Land' 'Community Centers' 'Organic Fertilizer'
 'Composting (farm yard manure and food waste)'
 'Pest management using pheromones' 'Improved seed of pepper'
 'Improved seed of cowpea' 'Innoculant' 'Seed Yam'
 'Improved seed of tomato']


In [238]:
unique_items = list(pd.unique(cleaned_list))
unique_items.remove("N/A")
unique_items

['Cutlass',
 'Improved seed of maize',
 'NPK Fertilizer',
 'Mobile phone',
 'Insecticides',
 'Ammonia/Urea fertilizer',
 'Hoe',
 'Tractor',
 'Crop Residue',
 'Radio',
 'Sack',
 'Land',
 'Community Centers',
 'Organic Fertilizer',
 'Composting (farm yard manure and food waste)',
 'Pest management using pheromones',
 'Improved seed of pepper',
 'Improved seed of cowpea',
 'Innoculant',
 'Seed Yam',
 'Improved seed of tomato']

In [239]:
new_df = create_df_with_ids(unique_items)
new_df.to_csv('../data/prioritized_csa_input.csv', index=False, header=True)

# Extract Crop Harvest

In [240]:
df['crop_harvest'].fillna("N/A", inplace = True)
harvest_names = df['crop_harvest'].tolist()

In [241]:
cleaned_list = extract_details(harvest_names)
print(pd.unique(cleaned_list))

['Quantity kept for planting (seed)' 'Quantity Consumed'
 'Quantity lost through Post-harvest losses'
 'Quantity used as payment for inputs' 'Other'
 'Quantity bartered or exchanged for goods and services' 'N/A']


In [242]:
unique_items = list(pd.unique(cleaned_list))
unique_items.remove("N/A")
unique_items

['Quantity kept for planting (seed)',
 'Quantity Consumed',
 'Quantity lost through Post-harvest losses',
 'Quantity used as payment for inputs',
 'Other',
 'Quantity bartered or exchanged for goods and services']

In [243]:
new_df = create_df_with_ids(unique_items)
new_df.to_csv('../data/crop_harvest.csv', index=False, header=True)

# Extract Money Borrowed

In [244]:
df['money_borrowed'].fillna("N/A", inplace = True)
borrowed_names = df['money_borrowed'].tolist()

In [245]:
cleaned_list = extract_details(borrowed_names)
print(pd.unique(cleaned_list))

['N/A' 'Informal savings and credit group' 'Relative and friends' 'Bank'
 'Money lender' 'Micro-finance institution' 'NGO/Church/Mosque']


In [246]:
unique_items = list(pd.unique(cleaned_list))
unique_items.remove("N/A")
unique_items

['Informal savings and credit group',
 'Relative and friends',
 'Bank',
 'Money lender',
 'Micro-finance institution',
 'NGO/Church/Mosque']

In [247]:
new_df = create_df_with_ids(unique_items)
new_df.to_csv('../data/money_borrowed.csv', index=False, header=True)

# Extract CSA Practice Awareness *

In [248]:
df['csa_practice_awareness'].fillna("N/A", inplace = True)
csa_prac_aware = df['csa_practice_awareness'].tolist()

In [249]:
cleaned_list = extract_details(csa_prac_aware)
print(pd.unique(cleaned_list))

['Promotion of stress (drought' 'early maturing'
 'striga and low N ) tolerant Improved maize' 'cowpea varieties'
 'Seedbed options-Ridging as an alternative to mounding for yam production'
 'Water management (mulching)'
 'Promotion of disease and pest tolerant maize and cowpea varieties'
 'Minimum tillage for maize' 'cowpea and vegetable production'
 'Enhancing access to climate information'
 'Minimum tillage  for maize and cowpea production'
 'Promotion of disease and pest tolerant maize' 'cowpea'
 'potato and tomato varieties'
 'Organic amendment for improving soil health'
 'Promotion of On-Farm Composting' 'Enhanced biopesticide use in yam'
 'maize and cowpea  systems'
 'Promotion of disease and pest tolerant potato varieties'
 'Enhanced biopesticide use in potato systems' 'N/A'
 'Pest management using sticky traps' 'pheromones'
 'Composting (farm yard manure and food waste)' 'Seasonal calenders'
 'Leguminous crops as previous crop to cereals'
 'Mucuna pruriens or cowpea /maize int

In [250]:
unique_items = list(pd.unique(cleaned_list))
unique_items.remove("N/A")
unique_items

['Promotion of stress (drought',
 'early maturing',
 'striga and low N ) tolerant Improved maize',
 'cowpea varieties',
 'Seedbed options-Ridging as an alternative to mounding for yam production',
 'Water management (mulching)',
 'Promotion of disease and pest tolerant maize and cowpea varieties',
 'Minimum tillage for maize',
 'cowpea and vegetable production',
 'Enhancing access to climate information',
 'Minimum tillage  for maize and cowpea production',
 'Promotion of disease and pest tolerant maize',
 'cowpea',
 'potato and tomato varieties',
 'Organic amendment for improving soil health',
 'Promotion of On-Farm Composting',
 'Enhanced biopesticide use in yam',
 'maize and cowpea  systems',
 'Promotion of disease and pest tolerant potato varieties',
 'Enhanced biopesticide use in potato systems',
 'Pest management using sticky traps',
 'pheromones',
 'Composting (farm yard manure and food waste)',
 'Seasonal calenders',
 'Leguminous crops as previous crop to cereals',
 'Mucuna pru

In [251]:
new_df = create_df_with_ids(unique_items)
new_df.to_csv('../data/csa_practice_awareness.csv', index=False, header=True)

# Extract CSA Practices *

In [252]:
df['csa_practices'].fillna("N/A", inplace = True)
csa_practices = df['csa_practices'].tolist()

In [253]:
cleaned_list = extract_details(csa_practices)
print(pd.unique(cleaned_list))

['None' 'Promotion of stress (drought' 'early maturing'
 'striga and low N ) tolerant Improved maize' 'cowpea varieties'
 'Minimum tillage for maize' 'cowpea and vegetable production'
 'Promotion of disease and pest tolerant maize and cowpea varieties'
 'Seedbed options-Ridging as an alternative to mounding for yam production'
 'Water management (mulching)' 'Enhancing access to climate information'
 'Promotion of disease and pest tolerant maize' 'cowpea'
 'potato and tomato varieties'
 'Organic amendment for improving soil health'
 'Promotion of On-Farming Composting'
 'Promotion of disease and pest tolerant potato varieties'
 'Enhanced biopesticide use in yam' 'maize and cowpea  systems'
 'Enhanced biopesticide use in potato system' 'N/A'
 'Composting (farm yard manure and food waste)'
 'Pest management using sticky traps' 'pheromones'
 'Leguminous crops as previous crop to cereals'
 'Mucuna pruriens or cowpea /maize intercropping to build soil C stocks'
 'Leguminous crops as previous

In [254]:
unique_items = list(pd.unique(cleaned_list))
unique_items.remove("N/A")
unique_items

['None',
 'Promotion of stress (drought',
 'early maturing',
 'striga and low N ) tolerant Improved maize',
 'cowpea varieties',
 'Minimum tillage for maize',
 'cowpea and vegetable production',
 'Promotion of disease and pest tolerant maize and cowpea varieties',
 'Seedbed options-Ridging as an alternative to mounding for yam production',
 'Water management (mulching)',
 'Enhancing access to climate information',
 'Promotion of disease and pest tolerant maize',
 'cowpea',
 'potato and tomato varieties',
 'Organic amendment for improving soil health',
 'Promotion of On-Farming Composting',
 'Promotion of disease and pest tolerant potato varieties',
 'Enhanced biopesticide use in yam',
 'maize and cowpea  systems',
 'Enhanced biopesticide use in potato system',
 'Composting (farm yard manure and food waste)',
 'Pest management using sticky traps',
 'pheromones',
 'Leguminous crops as previous crop to cereals',
 'Mucuna pruriens or cowpea /maize intercropping to build soil C stocks',
 'L

In [255]:
new_df = create_df_with_ids(unique_items)
new_df.to_csv('../data/csa_practices.csv', index=False, header=True)

# Extract Commodity Value Chain

In [256]:
df['commodity_value_chain'].fillna("N/A", inplace = True)
comm_val_chain = df['commodity_value_chain'].tolist()

In [257]:
cleaned_list = extract_details(comm_val_chain)
print(pd.unique(cleaned_list))

['Yam' 'Other' 'Maize' 'Cowpea' 'Sweet Potato' 'N/A' 'Tomatoes']


In [258]:
unique_items = list(pd.unique(cleaned_list))
unique_items.remove("N/A")
unique_items

['Yam', 'Other', 'Maize', 'Cowpea', 'Sweet Potato', 'Tomatoes']

In [259]:
new_df = create_df_with_ids(unique_items)
new_df.to_csv('../data/commodity_value_chain.csv', index=False, header=True)