# Import Dependencies

In [48]:
import numpy as np
import pandas as pd

# Convert Multiple Excel sheets to CSV

In [49]:
excel_file = '../data/Moonshot Tracker Results - Auto.xlsx'
all_sheets = pd.read_excel(excel_file, sheet_name=None)
sheets = all_sheets.keys()

for sheet_name in sheets:
    sheet = pd.read_excel(excel_file, sheet_name=sheet_name)
    sheet.to_csv(f"../data/{sheet_name}.csv", index=False)

In [138]:
projects_raw = pd.read_csv("../data/Projects.csv")
outputs = pd.read_csv("../data/Outputs.csv")

# Data Relationship

![image info](../data/data_relationship.jpeg)

In [139]:
projects_raw.columns

Index(['Project ID', 'Project Title', 'Budget', 'Country', 'Country Code',
       'Link', 'Donors', 'VF or Non-VF', 'Output Count',
       'Direct Beneficiaries', 'GHG Emissions Reduction', 'description',
       'sdgs', 'solution', 'Gender Marker'],
      dtype='object')

In [140]:
project_target_column  = ['Project ID', 'Project Title', 'Budget', 'Donors', 'VF or Non-VF', 'Output Count',
       'Direct Beneficiaries', 'description']

In [141]:
projects_raw = projects_raw[project_target_column]

In [142]:
outputs.columns

Index(['Project ID', 'Title', 'Link', 'Budget', 'Beneficiary Category',
       'Indicator', 'Baseline', 'Target', 'Notes', 'Donors',
       'Gender (% female)', 'VF or Non-VF', 'Tag', 'SEH Taxonomy',
       'RISE Taxonomy', 'Flagship', 'Technology', 'Output ID', 'Country Code',
       'Category', 'name 2', 'm49', 'continent-region', 'sub-region',
       'sids-region', 'un-member', 'undp-sids', 'un-region', 'Country Name',
       'Region', 'Economy', 'LDC', 'SIDS', 'LLDC', 'HDI', 'Status',
       'Direct Conversion Factor', 'Direct Beneficiaries', 'Output Category',
       'Beneficiary Category Pre', 'Description'],
      dtype='object')

In [158]:
outputs_target_column  = ['Project ID', 'Beneficiary Category','Gender (% female)', 'continent-region', 'Country Name',
       'Region', 'Direct Beneficiaries', 'Output Category', 'Description']

In [159]:
outputs = outputs[outputs_target_column]

# Functions for Extracting Information
Our Goal: Generate text summaries of the UNDP portfolio for each country

Approach: 
1. Assign 'continent-region', 'Country Name', and 'Region' to the project dataframe.
2. Create a function to get each 'project' for the specified 'continent-region', 'Country Name', or 'Region'.
3. Create a function to get the 'output's of each 'project'.
4. Create a function to call the OpenAI API and generate a summary for the UNDP portfolio for each country using a suitable prompt.

## 1. Assign 'continent-region', 'Country Name', and 'Region' to the project dataframe

In [145]:
output_country = outputs[['Project ID', 'continent-region', 'Country Name','Region']]

In [146]:
output_country = output_country.drop_duplicates().reset_index(drop=True)

In [147]:
projects = pd.merge(projects_raw, output_country, on = 'Project ID')

In [148]:
projects.rename(columns = {'continent-region':'continent', 'Country Name': 'country','Region': 'undp_region'}, inplace = True)

In [149]:
projects.head()

Unnamed: 0,Project ID,Project Title,Budget,Donors,VF or Non-VF,Output Count,Direct Beneficiaries,description,continent,country,undp_region
0,117913,Résilience des communautés et des écosystèmes,3459600.0,['UNITED NATIONS DEVELOPMENT PRO'],Non-VF,3,3697.5,Il s’agit de la promotion du développement Loc...,Africa,Togo,RBA
1,134793,Accès aux énergies renouvelables en milieu rur...,1431552.0,['UNITED NATIONS DEVELOPMENT PRO'],Non-VF,3,31525.0,Accélération de l’accès aux énergies renouvela...,Africa,Togo,RBA
2,91204,Apoyo a la Modernización de La Gestión Ambiental,4202031.0,UNDP (TRAC 4000) CLIMATE PROMISE (28708) - NDC...,Non-VF,1,0.0,,Americas,Panama,RBLAC
3,133871,Beyond Recovery COVID19 Energy,473000.0,['UNITED NATIONS DEVELOPMENT PRO'],Non-VF,2,17500.0,"Acceso universal, a través de la implementació...",Americas,Panama,RBLAC
4,6613,Africa Mini-grids Program,1363947.0,,VF,3,4936.0,,Africa,Zambia,RBA


In [154]:
projects.shape

(340, 11)

In [156]:
projects['Project ID'].nunique()

339

## 2. Create a function to get each 'project' for the specified 'continent-region', 'Country Name', or 'Region'

In [150]:
def get_project_per_region(region_type, region_value):
    target_projects = None
    region_type, region_value = region_type.lower(), region_value.lower()
    if region_type == 'continent':
        target_projects = projects[projects['continent'].str.lower().isin([region_value])]
    elif region_type == 'country':
        target_projects = projects[projects['country'].str.lower().isin([region_value])]
    elif region_type == 'undp_region':
        target_projects = projects[projects['undp_region'].str.lower().isin([region_value])]
    return target_projects

In [151]:
# test function
print(get_project_per_region('continent', 'Africa')['continent'].unique())
print(get_project_per_region('continent', 'americas')['continent'].unique())
print(get_project_per_region('country', 'panama')['country'].unique())
print(get_project_per_region('undp_region', 'rblac')['undp_region'].unique())

['Africa']
['Americas']
['Panama']
['RBLAC']


## 3. Create a function to get the 'output's of each 'project'

In [160]:
outputs.head()

Unnamed: 0,Project ID,Beneficiary Category,Gender (% female),continent-region,Country Name,Region,Direct Beneficiaries,Output Category,Description
0,117913,Clean Electricity,0.24,Africa,Togo,RBA,3697.5,Energy Access,Provide access to solar energy for 3697 househ...
1,117913,Other,,Africa,Togo,RBA,0.0,Other,Increase the percentage of coal producers in t...
2,117913,Other,,Africa,Togo,RBA,0.0,Other,Convert 460 female charcoal producers to alter...
3,134793,Clean Electricity,0.2,Africa,Togo,RBA,6525.0,Energy Access,Provide access to solar energy for households ...
4,134793,Other,,Africa,Togo,RBA,0.0,Other,Accelerate access to renewable energy for econ...


In [164]:
def get_output_per_project(project_ids):
    return outputs[outputs['Project ID'].isin(project_ids)]

In [165]:
# test function
project_ids = ['6613', 'missing-ZMB-1', '1061012', '0']
get_output_per_project(project_ids)

Unnamed: 0,Project ID,Beneficiary Category,Gender (% female),continent-region,Country Name,Region,Direct Beneficiaries,Output Category,Description
9,6613,Agriculture and Food System,51.0,Africa,Zambia,RBA,4936.0,Energy Access,"Provide energy access to 4,936 beneficiaries, ..."
10,6613,Other,,Africa,Zambia,RBA,0.0,GHG Emissions Reduction,Mitigate 13.78 ktCO2 of greenhouse gas emissio...
11,6613,Clean Electricity,,Africa,Zambia,RBA,0.0,Energy Access,Install 450 kW of solar PV capacity and 1.091 ...
12,missing-ZMB-1,Health Services,,Africa,Zambia,RBA,810000.0,Energy Access,Provide access to modern electricity service f...
13,missing-ZMB-1,Other,,Africa,Zambia,RBA,0.0,Other,Provide uninterrupted power supply to 1002 hea...
391,1061012,Other,,Asia,Viet Nam,RBAP,0.0,GHG Emissions Reduction,Reduce 45 tonnes of CO2 emissions per year thr...
392,1061012,Unknown,,Asia,Viet Nam,RBAP,73500.0,Energy Transition,Install 4.2 megawatts of renewable or low-emis...
393,1061012,Unknown,,Asia,Viet Nam,RBAP,570.0,Energy Transition,Provide new access to green/sustainable energy...
394,1061012,Other,,Asia,Viet Nam,RBAP,0.0,Other,Create 70 green/sustainable jobs in Viet Nam t...
395,1061012,Other,,Asia,Viet Nam,RBAP,0.0,Other,Train and educate 370 individuals through tech...
