In [1]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }a
</style>
"""))

In [2]:
#! pip install unicode

In [2]:
import unidecode
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt 
import chardet
import geopandas as gpd
from geopandas import GeoSeries
from shapely.geometry import Point, LineString
import folium  as fm
from folium import Marker, GeoJson
from folium.plugins import MarkerCluster, HeatMap, StripePattern
import branca as br 

### Import data and shape file

In [3]:
#Gettting the character format
base = open(r'../data/MINING_PERU.csv', 'rb').read()
det = chardet.detect(base)
charenc = det['encoding']

Import the mines data

In [4]:
base = open(r'../data/POVERTY_PERU.csv', 'rb').read()
det = chardet.detect(base)
charenc = det['encoding']
mines = pd.read_csv( r'../data/MINING_PERU.csv', encoding = charenc)

In [5]:
mines['UBIGEO'] = mines['UBIGEO'].astype(str)

In [6]:
mines.rename(columns={'REGION': 'DEPARTAMENTO'}, inplace=True)

In [7]:
poverty = pd.read_csv(r'../data/POVERTY_PERU.csv', encoding = charenc)

In [8]:
# Function to remove tildes
def quitar_tildes(cadena):
    return unidecode.unidecode(cadena)

poverty['DEPARTAMENTO'] = poverty['DEPARTAMENTO'].apply(quitar_tildes)
poverty['DISTRITO'] = poverty['DISTRITO'].apply(quitar_tildes)

In [9]:
poverty.rename(columns={'UBIGEO1': 'UBIGEO'}, inplace=True)

In [10]:
poverty['UBIGEO'] = poverty['UBIGEO'].astype(str)

In [11]:
poverty['UBIGEO'] = poverty['UBIGEO'].str.zfill(6)

In [12]:
poverty['DEPARTAMENTO'] = poverty['DEPARTAMENTO'].str.strip()

We import the shape file for the districts

In [13]:
dist_shape = gpd.read_file( r'../../Diplomado_PUCP/_data/LIMITE_DISTRITAL_2020_INEI/INEI_LIMITE_DISTRITAL.shp')

In [14]:
dist_shape = dist_shape[['UBIGEO','geometry']]

In [15]:
dataset_map = pd.merge(poverty, dist_shape, how="right", on="UBIGEO")

In [16]:
mines_count = mines.groupby(['DEPARTAMENTO', 'PROVINCIA', 'DISTRITO']).size().reset_index(name='NUM_MINAS_DIST')

In [17]:
combined_df = pd.merge(poverty, mines_count, on=['DEPARTAMENTO', 'PROVINCIA', 'DISTRITO'], how='left')

No todos los distritos tienen minas, por lo que llenaremos los valores faltantes con 0

In [18]:
combined_df['NUM_MINAS_DIST'] = combined_df['NUM_MINAS_DIST'].fillna(0)

We create a new column to indicate if the district has a mine 

In [19]:
combined_df['TIENE_MINA'] = combined_df['NUM_MINAS_DIST'] > 0

We create a new dataset that only includes departments that have at least one district with a mine

In [20]:
minas_por_departamento = combined_df.groupby('DEPARTAMENTO')['NUM_MINAS_DIST'].sum().reset_index()

 We identify the departments with 0 mines and filter them out of new dataframe

In [21]:
departamentos_sin_minas = minas_por_departamento[minas_por_departamento['NUM_MINAS_DIST'] == 0]['DEPARTAMENTO']
dataset2 = combined_df[~combined_df['DEPARTAMENTO'].isin(departamentos_sin_minas)]

## POVERTY AND MINES MAP

In [None]:
mines.columns.str.strip()
mines.columns.values[1] = 'METODO'
mines.head(1)

In [None]:
# Function create table by each mine using html. 
def visual_html(i):
 
    # information by mine 

    method  = mines['METODO'].iloc[i] 
    owner = mines['TITULAR'].iloc[i]  
    unit = mines['UNIDAD'].iloc[i]  
    product =  mines['PRODUCTO'].iloc[i] 
    department = mines['DEPARTAMENTO'].iloc[i]
    province = mines['PROVINCIA'].iloc[i]
    district = mines['DISTRITO'].iloc[i]
        
    # Color for each column of table 
    
    left_col_colour = "#31bfeb"
    right_col_colour = "#BDC3C7"
    
    html = """<!DOCTYPE html>
<html>

<head>
    <p> Mine information </p>

</head>
    <table style="height: 126px; width: 350px;">  <!-- Comment: Create a teable. -->

<!-- Add information  -->

<tbody> 
<tr>

<!-- Add color by column -->

<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Method of exploitation</span></td>
<td style="width: 150px;background-color: """+ right_col_colour +""";">{}</td>""".format(method) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Owner</span></td>
<td style="width: 150px;background-color: """+ right_col_colour +""";">{}</td>""".format(owner) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Unit</span></td>
<td style="width: 150px;background-color: """+ right_col_colour +""";">{}</td>""".format(unit) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Product</span></td>
<td style="width: 150px;background-color: """+ right_col_colour +""";">{}</td>""".format(product) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Department</span></td>
<td style="width: 150px;background-color: """+ right_col_colour +""";">{}</td>""".format(department) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Province</span></td>
<td style="width: 150px;background-color: """+ right_col_colour +""";">{}</td>""".format(province) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">District</span></td>
<td style="width: 150px;background-color: """+ right_col_colour +""";">{}</td>""".format(district) + """
</tr>

</tbody>
</table>
</html>
"""
    return html

In [None]:
# Step 2: Calculate the average poverty rate for districts with and without mines within each department
average_poverty_rates = dataset2.groupby(['DEPARTAMENTO', 'TIENE_MINA'])['POVERTY_RATE'].mean().reset_index()

# Step 3: Pivot the table for easier comparison (optional)
# This step rearranges the DataFrame so you have one row per department with separate columns for the average poverty rates of districts with and without mines.
pivot_table = average_poverty_rates.pivot(index='DEPARTAMENTO', columns='TIENE_MINA', values='POVERTY_RATE')
pivot_table.columns = ['No_Mines', 'With_Mines']

# Step 4: Calculate the difference in poverty rates within departments (optional)
# This step adds a new column to the pivot table that shows the difference in average poverty rates between districts with and without mines.
pivot_table['Poverty_Rate_Difference'] = pivot_table['With_Mines'] - pivot_table['No_Mines']

In [None]:
pov_dif = pivot_table.reset_index()

In [None]:
dpt_shape = gpd.read_file( r'../../Diplomado_PUCP/_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp')

In [None]:
dpt_shape = dpt_shape[['NOMBDEP','geometry']]

In [None]:
dpt_shape.rename(columns={'NOMBDEP': 'DEPARTAMENTO'}, inplace=True)

In [None]:
pbi_idh = poverty[['DEPARTAMENTO','PBI_PC','IDH']]

In [None]:
pbi_idh = pbi_idh.drop_duplicates(subset=['DEPARTAMENTO']).reset_index(drop=True)

In [None]:
dpt_data = pd.merge(pov_dif, pbi_idh, how = 'left', on =  "DEPARTAMENTO" )

In [None]:
dpt_data = pd.merge(dpt_data, dpt_shape, how = 'right', on =  "DEPARTAMENTO" )

In [None]:
lat_palacio = -12.0757538
long_palacio = -76.9863174
zoom_start = 6

mine2 = fm.Map(location = [lat_palacio, long_palacio], tiles='cartodbpositron', zoom_start = zoom_start)

fm.Choropleth(
    geo_data=dpt_shape,
    data=dpt_data,
    columns=['DEPARTAMENTO', 'Poverty_Rate_Difference'],
    key_on="feature.properties.DEPARTAMENTO",
    fill_color="RdYlGn_r",
    fill_opacity=0.8,
    line_opacity=0.2,
    legend_name="Poverty Rate Diference (%)",
    smooth_factor=0,
    Highlight= True,
    line_color = "#000000",
    overlay=True,
    nan_fill_color = "White"  # fill white missing values 
    ).add_to(mine2)

fm.LayerControl().add_to(mine2)

mine
for i in range(0,len(mines)):
    html = visual_html(i)

    iframe = br.element.IFrame(html=html,width=350,height=300)
    popup = fm.Popup(iframe, parse_html=True)
    
    fm.Marker([mines['LATITUD'].iloc[i],mines['LONGITUD'].iloc[i]],
                  popup=popup, icon=fm.Icon(color= 'blue', icon='gem', prefix="fa")).add_to(mine2)
    
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Average difference in poverty rate between districts with mines and without mines by deparment</b></h3>
             '''
mine2.get_root().html.add_child(folium.Element(title_html))
    
mine2

In [None]:
pov_dif.sort_values(by='Poverty_Rate_Difference', ascending=True)

In 16 of 21 departments that have a mine, the average poverty rate of the districts with mines are lower than the average poverty rate of districts without mines. The biggest differences in poverty can be found in Pasco (-21pp.), Amazonas (-15 Where there is an average 21 point difference in the poverty rate

In [None]:
import matplotlib.pyplot as plt

# Set the size of the plot
plt.figure(figsize=(14, 8))

# Set the positions of the bars on the x-axis
r1 = range(len(pivot_table))
r2 = [x + 0.25 for x in r1]

# Plot bars
plt.bar(r1, pov_dif['No_Mines'], color='blue', width=0.25, label='No Mines')
plt.bar(r2, pov_dif['With_Mines'], color='red', width=0.25, label='With Mines')

# Add labels to the plot
plt.xlabel('Department', fontsize=12)
plt.ylabel('Average Poverty Rate', fontsize=12)
plt.xticks([r + 0.125 for r in range(len(pivot_table))], pov_dif['DEPARTAMENTO'], rotation=90)
plt.title('Comparison of Average Poverty Rates in Districts With and Without Mines by Department')

# Create legend & Show plot
plt.legend()
plt.tight_layout()  # Adjust the layout to make room for the rotated x-axis labels
plt.show()
