In [5]:
import pandas as pd
import numpy as np
import os
import rasterio

In [6]:
data = pd.read_csv('raw_data/ethiopia_10_by_10_ready.csv')

In [7]:
data.shape

(523, 5)

In [8]:
data["nightlights"].value_counts()


nightlights
0.000000    197
0.020875      2
0.107431      2
0.008249      2
0.070480      2
           ... 
0.032912      1
0.002156      1
0.001243      1
0.031277      1
0.000624      1
Name: count, Length: 322, dtype: int64

In [9]:
def drop_0s(df):
    """
    Elimina el 90% de las filas donde la columna 'nightlights' es igual a 0 de forma aleatoria.

    Args:
    df (pandas.DataFrame): DataFrame de entrada que contiene una columna 'nightlights'.

    Returns:
    pandas.DataFrame: DataFrame con el 90% de las filas con 'nightlights' igual a 0 eliminadas.
    """
    # Filtrar filas donde 'nightlights' es igual a 0
    zero_nightlights = df[df['nightlights'] == 0]
    
    # Calcular el número de filas a mantener (10% de las filas con 'nightlights' igual a 0)
    n_keep = int(0.1 * len(zero_nightlights))
    
    # Seleccionar aleatoriamente el 10% de las filas para mantener
    rows_to_keep = zero_nightlights.sample(n=n_keep, random_state=42)
    
    # Filtrar filas donde 'nightlights' no es igual a 0
    non_zero_nightlights = df[df['nightlights'] != 0]
    
    # Concatenar las filas no cero con las 10% de filas cero seleccionadas para mantener
    new_df = pd.concat([non_zero_nightlights, rows_to_keep])
    
    # Opcional: Reordenar el DataFrame final
    new_df = new_df.sample(frac=1).reset_index(drop=True)
    
    return new_df


In [10]:
new_data = drop_0s(data)
new_data

Unnamed: 0,country,cluster_lat,cluster_lon,cons_pc,nightlights
0,eth,11.612932,37.321052,5.394292,0.352716
1,eth,6.943022,37.295017,6.041677,0.000913
2,eth,8.970891,38.624556,12.048049,0.660489
3,eth,7.055916,38.485920,68.553522,3.386573
4,eth,13.670993,37.406426,9.611530,0.002650
...,...,...,...,...,...
340,eth,12.616914,37.468402,6.849624,1.038381
341,eth,11.531110,41.414190,8.992349,0.148784
342,eth,8.075790,38.439597,4.719057,0.031817
343,eth,7.276738,38.115738,6.227012,0.166164


In [11]:
def extract_subimage(src, lat, lon):
    """
    Extrae una sub-imagen de tamaño especificado alrededor de un punto central dado.
    
    Args:
    src (rasterio.io.DatasetReader): El objeto fuente abierto de Rasterio.
    lat (float): Latitud del centro de la sub-imagen.
    lon (float): Longitud del centro de la sub-imagen.
    km_per_pixel (float): Cuántos kilómetros representa un píxel.
    size_km (int): Tamaño de un lado de la sub-imagen cuadrada en kilómetros.
    
    Returns:
    np.ndarray: La sub-imagen extraída como una matriz de NumPy.
    """
    # Convertir coordenadas geográficas a coordenadas de píxel
    px, py = ~src.transform * (lon, lat)
    px, py = int(px), int(py)
    
    # Calcular el rango en píxeles para la sub-imagen
    km_per_pixel = 0.418877
    pixel_range = int(10 / km_per_pixel / 2)
    
    # Extraer la sub-imagen
    window = rasterio.windows.Window(px - pixel_range, py - pixel_range, 2 * pixel_range, 2 * pixel_range)
    sub_image = src.read(1, window=window)
    
    return sub_image



In [12]:
new_data

Unnamed: 0,country,cluster_lat,cluster_lon,cons_pc,nightlights
0,eth,11.612932,37.321052,5.394292,0.352716
1,eth,6.943022,37.295017,6.041677,0.000913
2,eth,8.970891,38.624556,12.048049,0.660489
3,eth,7.055916,38.485920,68.553522,3.386573
4,eth,13.670993,37.406426,9.611530,0.002650
...,...,...,...,...,...
340,eth,12.616914,37.468402,6.849624,1.038381
341,eth,11.531110,41.414190,8.992349,0.148784
342,eth,8.075790,38.439597,4.719057,0.031817
343,eth,7.276738,38.115738,6.227012,0.166164


In [13]:
import matplotlib.pyplot as plt
import rasterio
source = rasterio.open("raw_data/picture.tif")
X = []
for idx, row in new_data.iterrows():
    sub_image = extract_subimage(source, row['cluster_lat'], row['cluster_lon'])
    X.append(sub_image)

In [14]:
np.savez('ethiopia_10_by_10.npz', *X)

In [27]:
        # to load and see arrays:
loaded = np.load('ethiopia_10_by_10.npz')
print(loaded)

NpzFile 'ethiopia_10_by_10.npz' with keys: arr_0, arr_1, arr_2, arr_3, arr_4...


In [15]:
# import matplotlib.pyplot as plt
# import seaborn as sns
# vmin, vmax = np.percentile(sub_image, [2,98]) 
# plt.figure(figsize=(10, 10))  # Tamaño de la figura, ajustable según necesidad
# sns.heatmap(sub_image, cmap='gray', vmin=vmin, vmax=vmax)

In [43]:
type(X[0])

numpy.ndarray

In [37]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import preprocess_input

def preprocess_images_for_vgg16(X):

    processed_images = []

    for im_as_arr in X:
        im_as_arr = tf.expand_dims(im_as_arr, axis=2)
        im_as_arr = tf.image.resize(im_as_arr, [224, 224])

        # im_as_arr = im_as_arr.numpy().astype(np.float32)
        im_as_ten = tf.convert_to_tensor(im_as_arr, dtype=tf.float32)
        im_as_ten = tf.image.grayscale_to_rgb(im_as_ten)
        im_as_arr = preprocess_input(im_as_ten)

        processed_images.append(im_as_ten)

    return processed_images

processed_images = preprocess_images_for_vgg16(X)

In [38]:
len(processed_images)

345

In [41]:
print(processed_images)

[<tf.Tensor: shape=(224, 224, 3), dtype=float32, numpy=
array([[[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        ...,
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       [[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        ...,
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       [[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        ...,
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       ...,

       [[0.        , 0.        , 0.        ],
        [0. 