# Tests

In this notebook we try to see how we can test the compatibility of different sector codes. First we will install the external reference and then also the worksheet of the geodata template.

## Import the iati data

In [2]:
import json
from pprint import pprint

file_path = "../external/iati_sector_codes_v2_0_3.json"

with open(file_path) as f:
    full_data = json.load(f)

sector_codes = full_data["data"]
pprint(sector_codes)

[{'category': '111',
  'code': '11110',
  'description': 'Education sector policy, planning and programmes; aid to '
                 'education ministries, administration and management systems; '
                 'institution capacity building and advice; school management '
                 'and governance; curriculum and materials development; '
                 'unspecified education activities.',
  'name': 'Education policy and administrative management',
  'status': 'active'},
 {'category': '111',
  'code': '11120',
  'description': 'Educational buildings, equipment, materials; subsidiary '
                 'services to education (boarding facilities, staff housing); '
                 'language training; colloquia, seminars, lectures, etc.',
  'name': 'Education facilities and training',
  'status': 'active'},
 {'category': '111',
  'code': '11130',
  'description': 'Teacher education (where the level of education is '
                 'unspecified); in-service and pre-service 

In [9]:
# create a list of all the codes in the sector_codes
iati_codes = []
for sector in sector_codes:
    iati_codes.append(sector["code"])

pprint(iati_codes)

['11110',
 '11120',
 '11130',
 '11182',
 '11220',
 '11230',
 '11231',
 '11232',
 '11240',
 '11250',
 '11260',
 '11320',
 '11321',
 '11322',
 '11330',
 '11420',
 '11430',
 '12110',
 '12181',
 '12182',
 '12191',
 '12196',
 '12220',
 '12230',
 '12240',
 '12250',
 '12261',
 '12262',
 '12263',
 '12264',
 '12281',
 '12310',
 '12320',
 '12330',
 '12340',
 '12350',
 '12382',
 '13010',
 '13020',
 '13030',
 '13040',
 '13081',
 '13096',
 '14010',
 '14015',
 '14020',
 '14021',
 '14022',
 '14030',
 '14031',
 '14032',
 '14040',
 '14050',
 '14081',
 '15110',
 '15111',
 '15112',
 '15113',
 '15114',
 '15116',
 '15117',
 '15118',
 '15119',
 '15120',
 '15121',
 '15122',
 '15123',
 '15124',
 '15125',
 '15126',
 '15127',
 '15128',
 '15129',
 '15130',
 '15131',
 '15132',
 '15133',
 '15134',
 '15135',
 '15136',
 '15137',
 '15140',
 '15142',
 '15143',
 '15144',
 '15150',
 '15151',
 '15152',
 '15153',
 '15154',
 '15155',
 '15156',
 '15160',
 '15161',
 '15162',
 '15163',
 '15164',
 '15170',
 '15180',
 '15185',


And now also import the data from the Project_Location_Data_Excel file.

In [21]:
# open the DAC Purpose Codes worksheet from the excel
import pandas as pd

# Pfad zur Excel-Datei
excel_file_path = '../Project_Location_Data_Template_V02.xlsx'

# Name oder Index des Arbeitsblatts, das Sie importieren möchten
worksheet_name = "DAC Purpose Codes"

# Importieren des spezifischen Arbeitsblatts
df = pd.read_excel(excel_file_path, sheet_name=worksheet_name,dtype={'DAC 5 Code (CRS Code)': str, "DAC 3 Code":str, "voluntary code":str})

# Anzeigen der ersten Zeilen der importierten Daten
print(df.head())


  DAC 3 Code DAC 5 Code (CRS Code) voluntary code  \
0        110                   NaN            NaN   
1        111                   NaN            NaN   
2        NaN                 11110            NaN   
3        NaN                 11120            NaN   
4        NaN                 11130            NaN   

                                      DESCRIPTION  \
0                                       Education   
1                    Education, Level Unspecified   
2  Education policy and administrative management   
3               Education facilities and training   
4                                Teacher training   

       Clarifications / Additional notes on coverage  
0                                                NaN  
1  The codes in this category are to be used only...  
2  Education sector policy, planning and programm...  
3  Educational buildings, equipment, materials; s...  
4  Teacher education (where the level of educatio...  


In [22]:
# create a list of all the codes in the DAC 5 Code (CRS Code) that are not null
template_codes = []
for code in df["DAC 5 Code (CRS Code)"]:
    if pd.notnull(code):
        template_codes.append(code)

voluntary_codes = []
for code in df["voluntary code"]:
    if pd.notnull(code):
        voluntary_codes.append(code)
pprint(template_codes)
pprint(voluntary_codes)

['11110',
 '11120',
 '11130',
 '11182',
 '11220',
 '11230',
 '11231',
 '11232',
 '11240',
 '11250',
 '11260',
 '11320',
 '11330',
 '11420',
 '11430',
 '12110',
 '12181',
 '12182',
 '12191',
 '12220',
 '12230',
 '12240',
 '12250',
 '12261',
 '12262',
 '12263',
 '12264',
 '12281',
 '12310',
 '12320',
 '12330',
 '12340',
 '12350',
 '12382',
 '13010',
 '13020',
 '13030',
 '13040',
 '13081',
 '14010',
 '14015',
 '14020',
 '14021',
 '14022',
 '14030',
 '14031',
 '14032',
 '14040',
 '14050',
 '14081',
 '15110',
 '15111',
 '15112',
 '15113',
 '15114',
 '15125',
 '15130',
 '15142',
 '15150',
 '15151',
 '15152',
 '15153',
 '15160',
 '15170',
 '15180',
 '15190',
 '15210',
 '15220',
 '15230',
 '15240',
 '15250',
 '15261',
 '16010',
 '16020',
 '16030',
 '16040',
 '16050',
 '16061',
 '16062',
 '16063',
 '16064',
 '16070',
 '16080',
 '21010',
 '21020',
 '21030',
 '21040',
 '21050',
 '21061',
 '21081',
 '22010',
 '22020',
 '22030',
 '22040',
 '23110',
 '23181',
 '23182',
 '23183',
 '23210',
 '23220',


In [24]:
# now make sure that both lists have the same length
pprint(len(iati_codes))
pprint(len(template_codes))
pprint(len(voluntary_codes))

325
234
61


It would seem that the two are not of the same length and that the iati_codes are longer thatn the dac codes. So let us see which iati codes are not in the dac codes.

In [29]:
missing_codes = 0
for code in iati_codes:
    if code not in template_codes and not code in voluntary_codes:
        print(f"the code {code} is not present in the template")
        missing_codes += 1

print(f"there are {missing_codes} missing codes in the template")

the code 11321 is not present in the template
the code 11322 is not present in the template
the code 15120 is not present in the template
the code 15140 is not present in the template
the code 15161 is not present in the template
the code 15162 is not present in the template
the code 15163 is not present in the template
the code 15164 is not present in the template
the code 23010 is not present in the template
the code 23020 is not present in the template
the code 23030 is not present in the template
the code 23040 is not present in the template
the code 23050 is not present in the template
the code 23061 is not present in the template
the code 23062 is not present in the template
the code 23063 is not present in the template
the code 23064 is not present in the template
the code 23065 is not present in the template
the code 23066 is not present in the template
the code 23067 is not present in the template
the code 23068 is not present in the template
the code 23069 is not present in t

In [30]:
print(f"there are {missing_codes} missing codes in the template")

there are 30 missing codes in the template
