## Extracting Night-light images

In [1]:
import ipystata  
import pandas as pd
import savReaderWriter as s
import numpy as np
import time
import os
import os.path
from osgeo import gdal, ogr, osr
from scipy import ndimage
from scipy import misc
from io import StringIO
#import cStringIO
gdal.UseExceptions()
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline
import urllib

# Helper function to read a raster file
def read_raster(raster_file):
    """
    Function
    --------
    read_raster

    Given a raster file, get the pixel size, pixel location, and pixel value

    Parameters
    ----------
    raster_file : string
        Path to the raster file

    Returns
    -------
    x_size : float
        Pixel size
    top_left_x_coords : numpy.ndarray  shape: (number of columns,)
        Longitude of the top-left point in each pixel
    top_left_y_coords : numpy.ndarray  shape: (number of rows,)
        Latitude of the top-left point in each pixel
    centroid_x_coords : numpy.ndarray  shape: (number of columns,)
        Longitude of the centroid in each pixel
    centroid_y_coords : numpy.ndarray  shape: (number of rows,)
        Latitude of the centroid in each pixel
    bands_data : numpy.ndarray  shape: (number of rows, number of columns, 1)
        Pixel value
    """
    raster_dataset = gdal.Open(raster_file, gdal.GA_ReadOnly)
    # get project coordination
    proj = raster_dataset.GetProjectionRef()
    bands_data = []
    # Loop through all raster bands
    for b in range(1, raster_dataset.RasterCount + 1):
        band = raster_dataset.GetRasterBand(b)
        bands_data.append(band.ReadAsArray())
        no_data_value = band.GetNoDataValue()
    bands_data = np.dstack(bands_data)
    rows, cols, n_bands = bands_data.shape

    # Get the metadata of the raster
    geo_transform = raster_dataset.GetGeoTransform()
    (upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = geo_transform
    
    # Get location of each pixel
    x_size = 1.0 / int(round(1 / float(x_size)))
    y_size = - x_size
    y_index = np.arange(bands_data.shape[0])
    x_index = np.arange(bands_data.shape[1])
    top_left_x_coords = upper_left_x + x_index * x_size
    top_left_y_coords = upper_left_y + y_index * y_size
    # Add half of the cell size to get the centroid of the cell
    centroid_x_coords = top_left_x_coords + (x_size / 2)
    centroid_y_coords = top_left_y_coords + (y_size / 2)

    return (x_size, top_left_x_coords, top_left_y_coords, centroid_x_coords, centroid_y_coords, bands_data)


# Helper function to get the pixel index of the point
def get_cell_idx(lon, lat, top_left_x_coords, top_left_y_coords):
    """
    Function
    --------
    get_cell_idx

    Given a point location and all the pixel locations of the raster file,
    get the column and row index of the point in the raster

    Parameters
    ----------
    lon : float
        Longitude of the point
    lat : float
        Latitude of the point
    top_left_x_coords : numpy.ndarray  shape: (number of columns,)
        Longitude of the top-left point in each pixel
    top_left_y_coords : numpy.ndarray  shape: (number of rows,)
        Latitude of the top-left point in each pixel
    
    Returns
    -------
    lon_idx : int
        Column index
    lat_idx : int
        Row index
    """
    lon_idx = np.where(top_left_x_coords < lon)[0][-1]
    lat_idx = np.where(top_left_y_coords > lat)[0][-1]
    return lon_idx, lat_idx

In [3]:
# this illustrates how you can read the nightlight image
raster_file = 'D:/Intensive Data/Final Project/Nightlight Image/F162008.v4b_web.stable_lights.avg_vis.tif'
x_size, top_left_x_coords, top_left_y_coords, centroid_x_coords, centroid_y_coords, bands_data = read_raster(raster_file)
# save the result in compressed format - see https://docs.scipy.org/doc/numpy/reference/generated/numpy.savez.html
np.savez('nightlight2.npz', top_left_x_coords=top_left_x_coords, top_left_y_coords=top_left_y_coords, bands_data=bands_data)

In [4]:
#
# Your code here
#
def get_pixel (x_idx, y_idx):
    num_list = []
    for i in range(0,10):
        for j in range(0,10):
            num_list.append(bands_data[(y_idx-5+i),(x_idx-5+j)])
    val_max = np.max(num_list)
    val_mean = np.mean(num_list)
    val_median = np.median(num_list)
    val_min = np.min(num_list)
    val_std = np.std(num_list)
    return val_max, val_mean, val_median, val_min, val_std

def night_light_data(raster_file, cluster_centers_file):
    cluster = pd.read_csv(cluster_centers_file)
    cluster_centers = cluster[['DHSCLUST', 'xcoord', 'ycoord']]
    x_size, top_left_x_coords, top_left_y_coords, centroid_x_coords, centroid_y_coords, bands_data = read_raster(raster_file)
    lon = cluster_centers['xcoord']
    lat = cluster_centers['ycoord']
    lon_idx = lon
    lat_idx = lat

    # get index
    for i in range (0,len(cluster_centers['xcoord'])):
        lon_idx[i], lat_idx[i] = get_cell_idx(lon[i], lat[i], top_left_x_coords, top_left_y_coords)

    max_ = []
    mean_ = []
    median_ = []
    min_ = []
    std_ = []

    for center in range (0, len(cluster_centers['DHSCLUST'])):
        val_max, val_mean, val_median, val_min, val_std = get_pixel(int(lon_idx[center]), int(lat_idx[center]))
        max_.append(val_max)
        mean_.append(val_mean)
        median_.append(val_median)
        min_.append(val_min)
        std_.append(val_std)

    cluster_centers['max_'] = max_
    cluster_centers['mean_'] = mean_
    cluster_centers['median_'] = median_
    cluster_centers['min_'] = min_
    cluster_centers['std_'] = std_

    return cluster_centers

In [5]:
# input: raster file path and cluster center file path
# output: nightlife table.csv and plot night light

raster_file = 'D:/Intensive Data/Final Project/Nightlight Image/F162008.v4b_web.stable_lights.avg_vis.tif'
cluster_centers_file = 'D:/Intensive Data/Final Project/Cluster Coordinate/Nigeria2008.csv'

cluster_centers = night_light_data(raster_file, cluster_centers_file)
cluster_centers.to_csv('D:/Intensive Data/Final Project/DHS Nightlight/DHS_nightlights_Nigeria2008.csv', index = False)

# X = cluster_centers['mean_']
# Y = cluster_centers['wealth']
# # make the scatter plot
# def plot_nightlife(xd, yd, order=1, c='r', alpha=1, Rval=False):
#     #Calculate best fit
#     coeffs = np.polyfit(xd, yd, order)

#     intercept = coeffs[-1]
#     slope = coeffs[-2]
#     power = coeffs[0] if order == 2 else 0

#     minxd = np.min(xd)
#     maxxd = np.max(xd)

#     xl = np.array([minxd, maxxd])
#     yl = power * xl ** 2 + slope * xl + intercept

#     #Calculate R Squared
#     p = np.poly1d(coeffs)

#     ybar = np.sum(yd) / len(yd)
#     ssreg = np.sum((p(xd) - ybar) ** 2)
#     sstot = np.sum((yd - ybar) ** 2)
#     Rsqr = ssreg / sstot
#     plt.scatter(X, Y)
#     plt.plot(xl, yl)
#     plt.text(0.8 * maxxd + 0.2 * minxd, 0.8 * np.max(yd) + 0.2 * np.min(yd),
#                  '$R^2 = %0.3f$' % Rsqr)
#     return print('R2 value is %.6f' % (Rsqr))

#     # plot points and fit line
# plot_nightlife(X, Y, order=1, c='r', alpha=1, Rval=False)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

In [12]:
cluster_centers

NameError: name 'cluster_centers' is not defined