# Preparing RHoMIS Dataset for Spatial Analysis

In [1]:
# Setup
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from farmsize import data_prep, db_scan, mapping
from shapely.geometry import Point
import seaborn as sns
import warnings
warnings.simplefilter('ignore')

First we want to read in RHoMIS data alongside
data from IPUMS terra. 

In [5]:
# Reading in Data Sources
country_mapping = data_prep.load_json("./data/country_mappings.json")

ipums_terra = mapping.read_geo_csv("./data/ipums/processed/ipums_terra_merged.csv")
ipums_terra = ipums_terra.loc[ipums_terra["iso_2"].isin(country_mapping["iso_2"]),:]
ipums_terra = ipums_terra.merge(country_mapping, left_on="iso_2", right_on="iso_2", how="left")

# Loading RHoMIS Indicators
indicator_data = pd.read_csv("./data/rhomis-data/indicator_data/indicator_data.csv", encoding="latin", low_memory=False)
rhomis_data = pd.read_csv("./data/rhomis-data/processed_data/processed_data.csv", encoding="latin", low_memory=False) 

gps_coords = rhomis_data[["id_unique","gps_lat", "gps_lon"]]
indicator_data = indicator_data.merge(gps_coords, on="id_unique", how="left")

countries_iso_2 = country_mapping["iso_2"].to_list()
indicator_data = data_prep.subset_data(indicator_data, complete_gps=True, countries=countries_iso_2) # Subsetting data bas

# World Shapefile
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

Then we want to spatially join the IPUMS data with the 
RHoMIS dataset

In [6]:
# Spatially joining the household survey, the world shapefile, and the ipums terra data
indicator_data["geometry"] = [Point(xy) for xy in zip(indicator_data["gps_lon"], indicator_data["gps_lat"])]
geo_indicator = gpd.GeoDataFrame(indicator_data)
geo_indicator = geo_indicator.sjoin(world, how="left", op="within")
geo_indicator = geo_indicator.rename(columns={"index_right":"index_world_shapefile"})
geo_indicator = geo_indicator.sjoin(ipums_terra, how="left", op="within")
geo_indicator = geo_indicator.rename(columns={"index_right":"index_ipums_terra"})

Then we want to read in all of the LSMS datasets 

In [7]:
burkina_lsms = data_prep.link_points_to_ipums_and_world("BFA","latitude","longitude","./data/lsms/LSMS_Burkina_landsizes.csv",ipums_terra,world)
ethiopia_lsms = data_prep.link_points_to_ipums_and_world("ETH","latitude","longitude","./data/lsms/LSMS_Ethiopia_landsizes.csv",ipums_terra,world)
malawi_lsms = data_prep.link_points_to_ipums_and_world("MWI","Latitude","Longitude","./data/lsms/LSMS_Malawi_2011_landsizes.csv",ipums_terra,world)
malawi_lsms.rename({"Latitude":"latitude", "Longitude":"longitude"}, inplace=True)
niger_lsms = data_prep.link_points_to_ipums_and_world("NER","latitude","longitude","./data/lsms/LSMS_Niger_landsizes.csv",ipums_terra,world)
nigeria_lsms = data_prep.link_points_to_ipums_and_world("NGA","Latitude","Longitude","./data/lsms/LSMS_Nigeria_landsizes.csv",ipums_terra,world)
nigeria_lsms.rename({"Latitude":"latitude", "Longitude":"longitude"}, inplace=True)

tanzania_lsms = data_prep.link_points_to_ipums_and_world("TZA","latitude","longitude","./data/lsms/LSMS_Tanzania_landsizes.csv",ipums_terra,world)
uganda_lsms = data_prep.link_points_to_ipums_and_world("UGA","Latitude","Longitude","./data/lsms/LSMS_Uganda_2012_landsizes.csv",ipums_terra,world)
uganda_lsms.rename({"Latitude":"latitude", "Longitude":"longitude"}, inplace=True)


lsms_data = pd.concat([burkina_lsms,ethiopia_lsms, malawi_lsms, niger_lsms, nigeria_lsms, tanzania_lsms, uganda_lsms])
lsms_data.to_csv("./data/lsms/lsms_all.csv",index=False)