In [24]:
import pandas as pd
import numpy as np
import os
import rasterio

In [37]:
data = pd.read_csv('raw_data/ethiopia_10_by_10_ready.csv')

In [38]:
data.shape

(523, 5)

In [48]:
data["nightlights"].value_counts()


0.000000    197
0.020875      2
0.107431      2
0.008249      2
0.070480      2
           ... 
0.032912      1
0.002156      1
0.001243      1
0.031277      1
0.000624      1
Name: nightlights, Length: 322, dtype: int64

In [39]:
def drop_0s(df):
    """
    Elimina el 90% de las filas donde la columna 'nightlights' es igual a 0 de forma aleatoria.

    Args:
    df (pandas.DataFrame): DataFrame de entrada que contiene una columna 'nightlights'.

    Returns:
    pandas.DataFrame: DataFrame con el 90% de las filas con 'nightlights' igual a 0 eliminadas.
    """
    # Filtrar filas donde 'nightlights' es igual a 0
    zero_nightlights = df[df['nightlights'] == 0]
    
    # Calcular el número de filas a mantener (10% de las filas con 'nightlights' igual a 0)
    n_keep = int(0.1 * len(zero_nightlights))
    
    # Seleccionar aleatoriamente el 10% de las filas para mantener
    rows_to_keep = zero_nightlights.sample(n=n_keep, random_state=42)
    
    # Filtrar filas donde 'nightlights' no es igual a 0
    non_zero_nightlights = df[df['nightlights'] != 0]
    
    # Concatenar las filas no cero con las 10% de filas cero seleccionadas para mantener
    new_df = pd.concat([non_zero_nightlights, rows_to_keep])
    
    # Opcional: Reordenar el DataFrame final
    new_df = new_df.sample(frac=1).reset_index(drop=True)
    
    return new_df


In [41]:
new_data = drop_0s(data)
new_data

Unnamed: 0,country,cluster_lat,cluster_lon,cons_pc,nightlights
0,eth,9.061072,38.791779,12.539863,5.043269
1,eth,5.901101,36.578564,4.349776,0.003150
2,eth,10.124927,39.671309,10.105894,0.026964
3,eth,6.501567,37.470998,4.182236,0.007650
4,eth,11.110676,39.627900,12.601003,0.619916
...,...,...,...,...,...
340,eth,8.167912,37.746413,3.705339,0.031277
341,eth,9.847087,36.342298,12.599424,0.003002
342,eth,12.623133,39.651682,11.201098,0.011183
343,eth,9.096073,43.204145,12.017693,0.070480


In [42]:
def extract_subimage(src, lat, lon):
    """
    Extrae una sub-imagen de tamaño especificado alrededor de un punto central dado.
    
    Args:
    src (rasterio.io.DatasetReader): El objeto fuente abierto de Rasterio.
    lat (float): Latitud del centro de la sub-imagen.
    lon (float): Longitud del centro de la sub-imagen.
    km_per_pixel (float): Cuántos kilómetros representa un píxel.
    size_km (int): Tamaño de un lado de la sub-imagen cuadrada en kilómetros.
    
    Returns:
    np.ndarray: La sub-imagen extraída como una matriz de NumPy.
    """
    # Convertir coordenadas geográficas a coordenadas de píxel
    px, py = ~src.transform * (lon, lat)
    px, py = int(px), int(py)
    
    # Calcular el rango en píxeles para la sub-imagen
    km_per_pixel = 0.418877
    pixel_range = int(10 / km_per_pixel / 2)
    
    # Extraer la sub-imagen
    window = rasterio.windows.Window(px - pixel_range, py - pixel_range, 2 * pixel_range, 2 * pixel_range)
    sub_image = src.read(1, window=window)
    
    return sub_image



In [47]:
new_data

Unnamed: 0,country,cluster_lat,cluster_lon,cons_pc,nightlights
0,eth,9.061072,38.791779,12.539863,5.043269
1,eth,5.901101,36.578564,4.349776,0.003150
2,eth,10.124927,39.671309,10.105894,0.026964
3,eth,6.501567,37.470998,4.182236,0.007650
4,eth,11.110676,39.627900,12.601003,0.619916
...,...,...,...,...,...
340,eth,8.167912,37.746413,3.705339,0.031277
341,eth,9.847087,36.342298,12.599424,0.003002
342,eth,12.623133,39.651682,11.201098,0.011183
343,eth,9.096073,43.204145,12.017693,0.070480


In [54]:

import matplotlib.pyplot as plt
import rasterio
source = rasterio.open("raw_data/picture.tif")
X = []
for idx, row in new_data.iterrows():
    sub_image = extract_subimage(source, row['cluster_lat'], row['cluster_lon'])
    X.append(sub_image)

In [49]:
# import matplotlib.pyplot as plt
# import seaborn as sns
# vmin, vmax = np.percentile(sub_image, [2,98]) 
# plt.figure(figsize=(10, 10))  # Tamaño de la figura, ajustable según necesidad
# sns.heatmap(sub_image, cmap='gray', vmin=vmin, vmax=vmax)