In [1]:
import csv
import pandas as pd
from owlready2 import *
import numpy as np
from unidecode import unidecode
import math



# Import Data

In [2]:
countries_df_final = pd.read_csv('./data/countries_df_final.csv', index_col='Index', encoding="utf-16")    
regions_df_final = pd.read_csv('./data/regions_df_final.csv', index_col='Index')    
continents_df_final = pd.read_csv('./data/continents_df_final.csv', index_col='Index')

# PreProcess

In [3]:
def edit_prospect_value(text):    
    
    if text.replace(",", "").isnumeric():
        return int(text.replace(",", ""))
    else:
        return sum( map(int, text.replace(",", "").replace("(N)", "").replace("(S)", "").split('/')))


continents_df_final['Population prospect by 2050'] = continents_df_final['Population prospect by 2050'].apply(lambda x: edit_prospect_value(x))
countries_df_final['Capital city'] = countries_df_final['Capital city'].apply(lambda x: unidecode(str(x)))
countries_df_final['UTC time offset'] = countries_df_final['UTC time offset'].apply(lambda x: unidecode(str(x)))
countries_df_final['Code'] = countries_df_final['Code'].apply(lambda x: str(x).replace("1 ", "1"))

# Define Namespces

In [4]:
geo_ontology = get_ontology("http://ce.sharif.edu/onto")

ns_country = get_ontology("http://ce.sharif.edu/onto/country")
ns_region = get_ontology("http://ce.sharif.edu/onto/region")
ns_continent = get_ontology("http://ce.sharif.edu/onto/continent")
ns_lang = get_ontology("http://ce.sharif.edu/onto/language")

# Define Classes

In [5]:
with geo_ontology:
    class Territory(Thing): 
        pass
    class Country(Territory): 
        namespace = ns_country
        
    class Region(Territory): 
        namespace = ns_region

    class Continent(Territory): 
        namespace = ns_continent
        
    class Language(Thing):
        namespace = ns_lang
        
    AllDisjoint([Continent, Country, Region, Language])

# Object Properties


In [6]:
# with geo_ontology:
    
#     #Country
#     class neighbour_of(Country >> Country): 
#         pass
    
#     class country_has_region(Country >> Region, FunctionalProperty): 
#         pass
    
#     class has_language(Country >> Language): 
#         pass

#     #Region
#     class r_hascountry(Region >> Country, InverseFunctionalProperty): 
#         inverse_property = country_has_region
        
#     class region_has_subregion(Region >> Region, InverseFunctionalProperty):
#         pass

#     class subregion_has_region(Region >> Region, InverseFunctionalProperty):
#         inverse_property = region_has_subregion
    
#     class region_has_continent(Region >> Continent, FunctionalProperty): 
#         pass
    
    
#     #Continent
#     class continent_has_region(Continent >> Region): 
#         inverse_property = region_has_continent

with geo_ontology:
    
    #Country
    class neighbour_of(Country >> Country): 
        pass
    
    class country_has_region(Country >> Region): 
        pass
    
    class has_language(Country >> Language): 
        pass

    #Region
    class r_hascountry(Region >> Country): 
        inverse_property = country_has_region
        
    class region_has_subregion(Region >> Region):
        pass

    class subregion_has_region(Region >> Region):
        inverse_property = region_has_subregion
    
    class region_has_continent(Region >> Continent): 
        pass
    
    
    #Continent
    class continent_has_region(Continent >> Region): 
        inverse_property = region_has_continent


# Data Type Properties


In [7]:
with geo_ontology:
    
    #Teritory
    class tr_name(Territory >> str, FunctionalProperty): pass
    class tr_population(Territory >> int, FunctionalProperty): pass
    class tr_area(Territory >> int, FunctionalProperty): pass
    class tr_pupulation_prospect(Continent >> int, FunctionalProperty): pass
    
    
    #Country
    class c_code(Country >> int): pass
    class c_capital(Country >> str, FunctionalProperty): pass
    class c_currency(Country >> str, FunctionalProperty): pass
    class c_toffset(Country >> str): pass
    class c_domain(Country >> str, FunctionalProperty): pass
    class c_population_density(Country >> float, FunctionalProperty): pass
    class c_capital_population(Country >> float, FunctionalProperty): pass
    class c_sex_ratio(Country >> float, FunctionalProperty): pass
    class c_gdp(Country >> int,FunctionalProperty): pass
    class c_gdp_rate(Country >> float , FunctionalProperty): pass
    
    #Auto_Gen
    class population_growth_rate(Country >> float , FunctionalProperty): pass
    class urban_population(Country >> float , FunctionalProperty): pass
    class urban_population_growth_rate(Country >> float , FunctionalProperty): pass
    class fertility_rate_total(Country >> float , FunctionalProperty): pass
    
    class life_expectancy_at_birth_f(Country >> float , FunctionalProperty): pass
    class life_expectancy_at_birth_m(Country >> float , FunctionalProperty): pass
    
    class population_age_distribution_t(Country >> float , FunctionalProperty): pass
    class population_age_distribution_o(Country >> float , FunctionalProperty): pass
    
    class international_migrant_stock_m(Country >> float , FunctionalProperty): pass
    class international_migrant_stock_p(Country >> float , FunctionalProperty): pass
    
    class refugees_and_others_of_concern_to_unhcr(Country >> float , FunctionalProperty): pass
    class infant_mortality_rate(Country >> float , FunctionalProperty): pass
    class health_current_expenditure(Country >> float , FunctionalProperty): pass
    class health_physicians(Country >> float , FunctionalProperty): pass
    class education_government_expenditure(Country >> float , FunctionalProperty): pass
    
    class education_primary_gross_enrol_ratio_f(Country >> float , FunctionalProperty): pass
    class education_primary_gross_enrol_ratio_m(Country >> float , FunctionalProperty): pass
    
    class education_secondary_gross_enrol_ratio_f(Country >> float , FunctionalProperty): pass
    class education_secondary_gross_enrol_ratio_m(Country >> float , FunctionalProperty): pass
    
    class education_tertiary_gross_enrol_ratio_f(Country >> float , FunctionalProperty): pass
    class education_tertiary_gross_enrol_ratio_m(Country >> float , FunctionalProperty): pass
    
    #--------------------------------------------------------------------
    
    class intentional_homicide_rate(Country >> float , FunctionalProperty): pass
    class seats_held_by_women_in_national_parliaments(Country >> float , FunctionalProperty): pass
    class individuals_using_the_internet(Country >> float , FunctionalProperty): pass
    
    class threatened_species(Country >> int , FunctionalProperty): pass
    
    class forested_area(Country >> float , FunctionalProperty): pass
    
    class energy_production_primary(Country >> int , FunctionalProperty): pass
    class energy_supply_per_capita(Country >> int , FunctionalProperty): pass
    
    class important_sites_for_terrestrial_biodiversity_protected(Country >> float , FunctionalProperty): pass
    class net_official_development_assist_received(Country >> float , FunctionalProperty): pass
    class research_development_expenditure(Country >> float , FunctionalProperty): pass
    
    class co2_emission_estimates_m(Country >> float , FunctionalProperty): pass
    class co2_emission_estimates_c(Country >> float , FunctionalProperty): pass

    class touristvisitor_arrivals_at_national_borders(Country >> int , FunctionalProperty): pass
    
    class pop_using_safely_managed_sanitation_u(Country >> float , FunctionalProperty): pass
    class pop_using_safely_managed_sanitation_r(Country >> float , FunctionalProperty): pass

    class pop_using_safely_managed_drinking_water_u(Country >> float , FunctionalProperty): pass
    class pop_using_safely_managed_drinking_water_r(Country >> float , FunctionalProperty): pass

    
    class net_official_development_assist_disbursed(Country >> float , FunctionalProperty): pass

# Insert Data

In [8]:
def convertURI(text):
    return unidecode(text).replace("&", "and").replace(" ", "_").replace("-", "_").replace('"', "")

def normalize(text):
    return unidecode(text).replace("&", "and")

In [9]:
with geo_ontology:
    for index, row in continents_df_final.iterrows():
        the_continent = Continent(convertURI(row['Index.1']), ns_continent, tr_name=normalize(row['Index.1'])) 
        if row['Surface area(km2)']: the_continent.tr_area = int(row['Surface area(km2)'])
        if row['Population(000, 2020)']: the_continent.tr_population = int(row['Population(000, 2020)'])
        if row['Population prospect by 2050']: the_continent.tr_pupulation_prospect = int(row['Population prospect by 2050'])

        if row['Regions']:
            for i in row['Regions'].replace("[","").replace("]", "").replace("'","").split(", "):
                the_region = Region(convertURI(i), ns_region) 
                the_continent.continent_has_region.append(the_region)

In [10]:
with geo_ontology:
    for index, row in regions_df_final.iterrows():
        the_region = Region(convertURI(row['Index.1']), ns_region, tr_name=normalize(row['Index.1'])) 
        if row['Surface area(km2)']: the_region.tr_area = int(row['Surface area(km2)'])
        if row['Population(000, 2020)']: the_region.tr_population = int(row['Population(000, 2020)'])
            
        if row['Region'] in ['Sub-Saharan Africa', 'Latin America & Caribbean']:
            the_super_region = Region(convertURI(row['Region']), ns_region) 
            the_region.subregion_has_region.append(the_super_region)
            
        if str(row['Countries'])!='nan':
            for i in str(row['Countries']).replace("[","").replace("]", "").replace("'","").split(", "):
                the_country = Country(convertURI(i), ns_country) 
                the_region.r_hascountry.append(the_country)            

In [11]:
with geo_ontology:
    for index, row in countries_df_final.iterrows():
        the_country = Country(convertURI(row['Index.1']), ns_country, tr_name=normalize(row['Index.1']))
        
        if str(row['Surface area(km2)'])!='nan':
            try:
                the_country.tr_area = int(row['Surface area(km2)'])
            except:
                pass #~0
                
        if row['Code']!='nan':
            for i in str(row['Code']).split(" "):
                the_country.c_code.append(int(i))

        if row['Capital city']!='nan':
            the_country.c_capital =  normalize(row['Capital city'])
            
        if str(row['Currency+ISO-4217'])!='nan':
            the_country.c_currency = str(row['Currency+ISO-4217'])

        if row['UTC time offset']!='nan':
            for i in row['UTC time offset'].replace("[","").replace("]", "").replace("'","").split(", "):
                the_country.c_toffset.append(str(i))
        
        if str(row['Internet Domain'])!='nan':
            the_country.c_domain = str(row['Internet Domain']).replace(".", "")
            
        if str(row['Pop. density(per km2, 2020)'])!='nan':
            the_country.c_population_density = float(row['Pop. density(per km2, 2020)'])
            
        if str(row['Capital city pop.(000, 2020)'])!='nan':
            try:
                the_country.c_capital_population = float(row['Capital city pop.(000, 2020)'])
            except:
                pass #date!
            
        if str(row['Sex ratio(m per 100 f)'])!='nan':
            the_country.c_sex_ratio = float(row['Sex ratio(m per 100 f)'])
        
        if str(row['GDP: Gross domestic product(million current US$)'])!='nan':
            the_country.c_gdp = int(row['GDP: Gross domestic product(million current US$)'])
 
        if str(row['GDP growth rate(annual %, const. 2015 prices)'])!='nan':
            try:
                the_country.c_gdp_rate = float(row['GDP growth rate(annual %, const. 2015 prices)'])
            except:
                pass #~0.0
        
        if str(row['Language'])!='nan':
            for i in str(row['Language']).replace("[","").replace("]", "").replace("'","").split(", "):
                if i not in ['No offical language in India', '', '37 other languages']:
                    if row['Index.1'] =='India':
                        print(i)
                    the_lang = Language(convertURI(i), ns_lang) 
                    the_country.has_language.append(the_lang)
                                        
        if str(row['Neighbors'])!='nan' and str(row['Neighbors'])!='[]':
            for i in str(row['Neighbors']).replace("[","").replace("]", "").replace("'","").split(", "):
                the_neighbor = Country(convertURI(i), ns_country) 
                the_country.neighbour_of.append(the_neighbor)
        
        if str(row['Population growth rate(average annual %)'])!='nan':
            try:
                the_country.population_growth_rate = float(row['Population growth rate(average annual %)'])
            except:
                pass #~0.0, -~0.0
   
                    
        if str(row['Intentional homicide rate(per 100 000 pop.)'])!='nan':
            try:
                the_country.intentional_homicide_rate = float(row['Intentional homicide rate(per 100 000 pop.)'])
            except:
                pass #~0.0
                
        if str(row['Seats held by women in national parliaments(%)'])!='nan':
            try:
                the_country.seats_held_by_women_in_national_parliaments = float(row['Seats held by women in national parliaments(%)'])
            except:
                pass #~0.0
                
        if str(row['Individuals using the Internet(per 100 inhabitants)'])!='nan':
            the_country.individuals_using_the_internet = float(row['Individuals using the Internet(per 100 inhabitants)'])
            
        if str(row['Threatened species(number)'])!='nan':
            the_country.threatened_species = int(row['Threatened species(number)'])
            
        if str(row['Forested area(% of land area)'])!='nan':
            try:
                the_country.forested_area = float(row['Forested area(% of land area)'])
            except:
                pass #~0.0
            
        if str(row['Energy production, primary(Petajoules)'])!='nan':
            the_country.energy_production_primary = int(row['Energy production, primary(Petajoules)'])
            
        if(str(row['Energy supply per capita(Gigajoules)']))!='nan':
            the_country.energy_supply_per_capita = int(row['Energy supply per capita(Gigajoules)'])
        
        if(str(row['Important sites for terrestrial biodiversity protected(%)']))!='nan':
            try:
                the_country.important_sites_for_terrestrial_biodiversity_protected = float(row['Important sites for terrestrial biodiversity protected(%)'])
            except:
                pass #~0.0
                
        if(str(row['Net Official Development Assist. received(% of GNI)']))!='nan':
            the_country.net_official_development_assist_received = float(row['Net Official Development Assist. received(% of GNI)'])
            
        if(str(row['Research & Development expenditure(% of GDP)']))!='nan':
            try:
                the_country.research_development_expenditure = float(row['Research & Development expenditure(% of GDP)'])
            except:
                pass #~0.0

        if str(row['CO2 emission estimates(million tons/tons per capita)'])!='nan':
            i = str(row['CO2 emission estimates(million tons/tons per capita)']).split('/')
            try:
                the_country.co2_emission_estimates_m = float(i[0])
            except:
                pass #~0.0
            try:
                the_country.co2_emission_estimates_c = float(i[1])
            except:
                pass #~0.0

        if(str(row['Tourist/visitor arrivals at national borders(000)']))!='nan':
            try:
                the_country.touristvisitor_arrivals_at_national_borders = int(row['Tourist/visitor arrivals at national borders(000)'])
            except:
                pass
            
        if str(row['Pop. using safely managed sanitation(urban/rural %)'])!='nan':
            i = str(row['Pop. using safely managed sanitation(urban/rural %)']).split('/')
            try:
                the_country.pop_using_safely_managed_sanitation_u = float(i[0])
            except:
                pass #...
            try:
                the_country.pop_using_safely_managed_sanitation_r = float(i[1])
            except:
                pass #...
         
        
        if str(row['Pop. using safely managed drinking water(urban/rural, %)'])!='nan':
            i = str(row['Pop. using safely managed drinking water(urban/rural, %)']).split('/')
            try:
                the_country.pop_using_safely_managed_drinking_water_u = float(i[0])
            except:
                pass #...
            try:
                the_country.pop_using_safely_managed_drinking_water_r = float(i[1])
            except:
                pass #...
                        
        if(str(row['Net Official Development Assist. disbursed(% of GNI)']))!='nan':
            the_country.net_official_development_assist_disbursed = float(row['Net Official Development Assist. disbursed(% of GNI)'])


In [12]:
geo_ontology.save(file="./output/ontology.owl")

In [13]:
#TODO 
a = "Population growth rate(average annual %)	Urban population(% of total population)	Urban population growth rate(average annual %)	Fertility rate, total(live births per woman)	Life expectancy at birth(females/males, years)	Population age distribution(0-14/60+ years old, %)	International migrant stock(000/% of total pop.)	Refugees and others of concern to UNHCR(000)	Infant mortality rate(per 1 000 live births)	Health: Current expenditure(% of GDP)	Health: Physicians(per 1 000 pop.)	Education: Government expenditure(% of GDP)	Education: Primary gross enrol. ratio(f/m per 100 pop.)	Education: Secondary gross enrol. ratio(f/m per 100 pop.)	Education: Tertiary gross enrol. ratio(f/m per 100 pop.)	Intentional homicide rate(per 100 000 pop.)	Seats held by women in national parliaments(%)	Individuals using the Internet(per 100 inhabitants)	Threatened species(number)	Forested area(% of land area)	Energy production, primary(Petajoules)	Energy supply per capita(Gigajoules)	Important sites for terrestrial biodiversity protected(%)	Net Official Development Assist. received(% of GNI)	Research & Development expenditure(% of GDP)	CO2 emission estimates(million tons/tons per capita)	Tourist/visitor arrivals at national borders(000)	Pop. using safely managed sanitation(urban/rural %)	Pop. using safely managed drinking water(urban/rural, %)	Net Official Development Assist. disbursed(% of GNI)"
for i in a.split(")"):
    k= i.split('(')[0].replace(".", "").replace(" ", "_"). replace(":","").replace("/", "").replace("&", "_").replace("," ,"").lower()
    print("class" + k + "(Country >> float , FunctionalProperty): pass")

classpopulation_growth_rate(Country >> float , FunctionalProperty): pass
class	urban_population(Country >> float , FunctionalProperty): pass
class	urban_population_growth_rate(Country >> float , FunctionalProperty): pass
class	fertility_rate_total(Country >> float , FunctionalProperty): pass
class	life_expectancy_at_birth(Country >> float , FunctionalProperty): pass
class	population_age_distribution(Country >> float , FunctionalProperty): pass
class	international_migrant_stock(Country >> float , FunctionalProperty): pass
class	refugees_and_others_of_concern_to_unhcr(Country >> float , FunctionalProperty): pass
class	infant_mortality_rate(Country >> float , FunctionalProperty): pass
class	health_current_expenditure(Country >> float , FunctionalProperty): pass
class	health_physicians(Country >> float , FunctionalProperty): pass
class	education_government_expenditure(Country >> float , FunctionalProperty): pass
class	education_primary_gross_enrol_ratio(Country >> float , FunctionalPropert

In [14]:
countries_df_final

Unnamed: 0_level_0,Region,"Population(000, 2020)","Pop. density(per km2, 2020)",Capital city,"Capital city pop.(000, 2020)",UN membership date,Surface area(km2),Sex ratio(m per 100 f),National currency,Exchange rate(per US$),...,Pop. using safely managed sanitation(urban/rural %),"Pop. using safely managed drinking water(urban/rural, %)",Net Official Development Assist. disbursed(% of GNI),Index.1,Neighbors,Language,Currency+ISO-4217,Internet Domain,UTC time offset,Code
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,Southern Asia,38928.0,59.6,Kabul,4114.0,19-Nov-46,652864,105.4,Afghani (AFN),78.4,...,,,,Afghanistan,"['Iran', 'Pakistan', 'Tajikistan', 'Turkmenist...","['Pashto', 'Dari']",AFN,.af,+04:30,93
Albania,Southern Europe,2878.0,105.0,Tirana,484.6,14-Dec-55,28748,103.7,Lek (ALL),108.6,...,40.2/39.4,,,Albania,"['Greece', 'North Macedonia', 'Montenegro']",['Albanian'],ALL,.al,+01:00,355
Algeria,Northern Africa,43851.0,18.4,Algiers,2729.3,08-Oct-62,2381741,102.1,Algerian Dinar (DZD),119.2,...,16.5/20.8,,,Algeria,"['Libya', 'Mali', 'Mauritania', 'Morocco', 'Ni...","['Arabic', 'Tamazight']",DZD,.dz,+01:00,213
American Samoa,Polynesia,56.0,279.0,Pago Pago,48.5,,199,103.6,US Dollar (USD),,...,,,,American Samoa,,,USD,.as,-11:00,1684
Andorra,Southern Europe,77.0,164.2,Andorra la Vella,22.6,28-Jul-93,468,102.3,Euro (EUR),0.9,...,100.0/100.0,,,Andorra,"['France', 'Spain']",['Catalan'],EUR,.ad,+01:00,376
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Northern Cyprus,,,,,,,,,,,...,,,,Northern Cyprus,,['Turkish'],,,,
Somaliland,,,,,,,,,,,...,,,,Somaliland,,"['Arabic', 'English', 'Somali']",,,,
South Ossetia,,,,,,,,,,,...,,,,South Ossetia,,"['Ossetian', 'Russian']",,,,
Taiwan,,,,,,,,,,,...,,,,Taiwan,,['Mandarin'],,.tw,+08:00,886
