In [19]:
import pandas as pd
path = '..\\..\\data\\school_geolocations_with-connnectivity.csv'
from utils import get_offset_in_degrees, haversine_vectorized

In [51]:
class SchoolsExtractor:
    def __init__(self, path):
        self.path = path
        self.data = self._load_data(self.path)

    def _load_data(self, path):
        data = pd.read_csv(path)
        return data
    
    def get_unconnected_schools(self, country=None):
        output_columns = ['country', 'education_level', 'school_name', 'latitude', 'longitude']
        if country is None:
            return self.data[self.data['connectivity'] == 'No'].reset_index(drop=True)[output_columns]
        else:
            return self.data[(self.data['country'] == country) & (self.data['connectivity'] == 'No')].reset_index(drop=True)[output_columns]
        
    def get_connected_schools(self, country=None):
        output_columns = ['country', 'education_level', 'school_name', 'latitude', 'longitude']
        if country is None:
            return self.data[self.data['connectivity'] == 'Yes'].reset_index(drop=True)[output_columns]
        else:
            return self.data[(self.data['country'] == country) & (self.data['connectivity'] == 'Yes')].reset_index(drop=True)[output_columns]
        




In [47]:
def get_distance(unconnected_schools, connected_schools, tolerance: float = 10.0):
    for index, unconnected_school in unconnected_schools.iterrows():
        display(unconnected_school)
        display(type(unconnected_school))
        latitude = unconnected_school['latitude']
        longitude = unconnected_school['longitude']
        lat_offset, lon_offset = get_offset_in_degrees(latitude, tolerance)

        # Definisci i limiti di latitudine e longitudine in base all'offset
        lat_min = latitude - lat_offset
        lat_max = latitude + lat_offset
        lon_min = longitude - lon_offset
        lon_max = longitude + lon_offset

        # Trova le scuole connesse all'interno dei limiti
        mask_bool = (connected_schools['latitude'] >= lat_min) & (connected_schools['latitude'] <= lat_max) & (connected_schools['longitude'] >= lon_min) & (connected_schools['longitude'] <= lon_max)
        filtered_data = connected_schools[mask_bool]
        print(filtered_data)
        break

        # Calcolare la distanza solo per i dati filtrati
        distances = haversine_vectorized(filtered_data['lat'], filtered_data['lon'], latitude, longitude)
    return data

In [76]:
def compute_nearby_connected_schools(unconnected_schools: pd.DataFrame, connected_schools: pd.DataFrame, tolerance: float = 10.0) -> pd.DataFrame:
    """ 
    Compute the number and distance of connected schools within a given distance tolerance for each unconnected school.
    Returns a DataFrame with the isolation index and list of distances for each unconnected school.
    """
    
    # Initialize lists to store results
    count_connected_schools = []
    distances_list = []

    # Iterate over each unconnected school
    for _, unconnected_school in unconnected_schools.iterrows():
        latitude = unconnected_school['latitude']
        longitude = unconnected_school['longitude']
        lat_offset, lon_offset = get_offset_in_degrees(latitude, tolerance)

        # Define bounding box
        lat_min = latitude - lat_offset
        lat_max = latitude + lat_offset
        lon_min = longitude - lon_offset
        lon_max = longitude + lon_offset

        # Filter connected schools within the bounding box
        mask_bool = (
            (connected_schools['latitude'] >= lat_min) & 
            (connected_schools['latitude'] <= lat_max) & 
            (connected_schools['longitude'] >= lon_min) & 
            (connected_schools['longitude'] <= lon_max)
        )
        filtered_data = connected_schools[mask_bool]

        # Calculate distances for filtered connected schools
        distances = haversine_vectorized(
            filtered_data['latitude'], 
            filtered_data['longitude'], 
            latitude, 
            longitude
        )

        # Filter schools truly within the tolerance radius
        within_radius = distances <= tolerance
        nearby_schools = filtered_data[within_radius]

        # Add the count of nearby schools and list of distances for the current unconnected school
        count_connected_schools.append(len(nearby_schools))
        distances_list.append(distances[within_radius].tolist())
        display(unconnected_school)
        display(count_connected_schools)
        display(distances_list)
        break

    # Add the new columns to the original unconnected_schools DataFrame
    unconnected_schools['isolation_index'] = count_connected_schools
    unconnected_schools['connected_schools_distances'] = distances_list

    return unconnected_schools


In [53]:
schools = SchoolsExtractor(path)
unconnected_schools = schools.get_unconnected_schools('Rwanda')
connected_schools = schools.get_connected_schools('Rwanda')
display(unconnected_schools)

Unnamed: 0,country,education_level,school_name,latitude,longitude
0,Rwanda,Primary,EP MUSHUBI,-1.631847,29.799809
1,Rwanda,Primary,EP KARIYERI,-1.621525,29.426821
2,Rwanda,Primary,EP MATABA SUD,-2.147800,29.939600
3,Rwanda,Primary,MWEYA,-2.297140,29.244221
4,Rwanda,Pre-Primary,ECD KIBIRIZI,-2.442858,29.561970
...,...,...,...,...,...
709,Rwanda,Primary And Secondary,GS NYABIKENKE B,-2.149312,29.490788
710,Rwanda,Primary And Secondary,GS REBERO,-1.404840,30.247391
711,Rwanda,Primary,CPEC ST BABETH,-1.574772,30.059046
712,Rwanda,Primary And Secondary,GS KAGARAMA ADVENTIST,-1.978022,29.923775


In [75]:
compute_nearby_connected_schools(unconnected_schools, connected_schools, 10.0)

Unnamed: 0,country,education_level,school_name,latitude,longitude,connected_schools_count,connected_schools_distances
0,Rwanda,Primary,EP MUSHUBI,-1.631847,29.799809,4,"[6.876082670437627, 6.31800696614466, 9.891909..."
1,Rwanda,Primary,EP KARIYERI,-1.621525,29.426821,3,"[9.389126929963147, 2.401191318606063, 9.31067..."
2,Rwanda,Primary,EP MATABA SUD,-2.147800,29.939600,4,"[4.825019209949741, 9.151084406815269, 9.69278..."
3,Rwanda,Primary,MWEYA,-2.297140,29.244221,4,"[4.83117006139934, 7.932508306459663, 6.120998..."
4,Rwanda,Pre-Primary,ECD KIBIRIZI,-2.442858,29.561970,0,[]
...,...,...,...,...,...,...,...
709,Rwanda,Primary And Secondary,GS NYABIKENKE B,-2.149312,29.490788,3,"[4.0747878039856325, 9.275073973715626, 9.6913..."
710,Rwanda,Primary And Secondary,GS REBERO,-1.404840,30.247391,1,[8.025874668842635]
711,Rwanda,Primary,CPEC ST BABETH,-1.574772,30.059046,5,"[1.234682023616063, 6.747405009714709, 9.25937..."
712,Rwanda,Primary And Secondary,GS KAGARAMA ADVENTIST,-1.978022,29.923775,5,"[2.2276448050462316, 6.794763803983291, 7.9572..."


In [61]:
data[(data['country'] == 'Kenya') & (data['connectivity'] == 'No')]

Unnamed: 0,country,iso2_code,iso3_code,school_id_giga,school_name,admin1_id_giga,admin2_id_giga,education_level,connectivity,latitude,longitude,school_data_source
12297,Kenya,ke,ken,28bcee0b-fe95-3c36-881a-5b0d6bfc604d,SIAYA TOWNSHIP SECONDARY SCHOOL,KEN038,KEN038002,Secondary,No,0.066600,34.284000,
12298,Kenya,ke,ken,28c04a96-dbb8-3c1d-9b77-5afae16ca68c,TETU SUB COUNTY PRIVATE CENTRE,KEN036,KEN036002,Secondary,No,-0.476000,36.929001,
12299,Kenya,ke,ken,28cfdd40-8fb5-3ced-940a-26402778aa74,MWANGA,KEN042,KEN042004,Primary,No,1.082841,35.182041,
12300,Kenya,ke,ken,28e1600b-b36f-32c5-91b8-f2840423d757,KISANGULA SECONDARY SCHOOL,KEN045,KEN045001,Secondary,No,0.079700,34.766998,
12301,Kenya,ke,ken,28f61196-d3b4-3603-8662-822aa1c4ea12,KERUNDUT MIXED DAY SECONDARY SCHOOL,KEN002,KEN002001,Secondary,No,-0.778990,35.340939,
...,...,...,...,...,...,...,...,...,...,...,...,...
65178,Kenya,ke,ken,ff9e58d2-a481-3e18-9fa6-9ea13de337ed,ST .NIMROD ACADEMY - KOMAHILL,KEN022,KEN022004,Secondary,No,-1.286344,37.120338,
65179,Kenya,ke,ken,ffcc313b-d477-352d-838c-fc015c1ebbee,KATHONZWENI GIRLS SECONDARY SCHOOL,KEN023,KEN023003,Secondary,No,-1.923272,37.678413,
65180,Kenya,ke,ken,ffd70a0e-29aa-35c5-a706-c72c656ead71,KUNYAK SECONDARY SCHOOL,KEN032,KEN032003,Secondary,No,0.144890,35.291382,
65181,Kenya,ke,ken,ffdb97f4-d033-3f1e-b34b-352787a72a20,GREMON EDUCATION CENTRE,KEN028,KEN028006,Secondary,No,-4.006350,39.704498,
