In [1]:
import pandas as pd
import numpy as np
import random
import io

### Loading Dataset

In [2]:
econ_df = pd.read_csv(filepath_or_buffer="./econ.csv", sep=',', header=0)
econ_df.head(3)

Unnamed: 0,id,geo_point_2d,geo_shape,clave_cat,delegacion,perimetro,tipo,nom_id
0,0,"19.424781053,-99.1327537959","{""type"": ""Polygon"", ""coordinates"": [[[-99.1332...",307_130_11,Cuauhtémoc,B,Mercado,Pino Suárez
1,1,"19.4346139576,-99.1413808393","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",002_008_01,Cuautémoc,A,Museo,Museo Nacional de Arquitectura Palacio de Bell...
2,2,"19.4340695945,-99.1306348409","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",006_002_12,Cuautémoc,A,Museo,Santa Teresa


### Sampling functions
#### 1. Simple random sampling

In [3]:
aleat_8 = econ_df.sample(n=8)
aleat_8

Unnamed: 0,id,geo_point_2d,geo_shape,clave_cat,delegacion,perimetro,tipo,nom_id
163,163,"19.4265454033,-99.1224859032","{""type"": ""Polygon"", ""coordinates"": [[[-99.1231...",323_063_05,Venustiano Carranza,B,Mercado,
60,60,"19.4274816595,-99.1285626779","{""type"": ""MultiPoint"", ""coordinates"": [[-99.12...",006_086_10,Cuautémoc,A,Hotel,Oviedo
76,76,"19.4367072434,-99.1424407474","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",003_095_02,Cuautémoc,A,Museo,Franz Mayer
132,132,"19.4248733452,-99.1202942813","{""type"": ""MultiPoint"", ""coordinates"": [[-99.12...",323_102_02,Venustiano Carranza,B,Hotel,De Casa
38,38,"19.4439555857,-99.1345196425","{""type"": ""Polygon"", ""coordinates"": [[[-99.1351...",004_106_01,Cuauhtémoc,B,Mercado,La Lagunilla Comestibles
147,147,"19.4383757081,-99.1416066037","{""type"": ""Polygon"", ""coordinates"": [[[-99.1416...",003_098_01,Cuauhtémoc,A,Mercado,2 de Abril
131,131,"19.4389531509,-99.1479144659","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",003_080_06,Cuautémoc,A,Museo,Panteón de San Fernando
69,69,"19.43558625,-99.12965746","{""type"": ""MultiPoint"", ""coordinates"": [[-99.12...",005_129_08,Cuautémoc,A,Hotel,Templo Mayor


#### 2. Systematic sampling

In [4]:
def systematic_sampling(econ_df, step):
    systematic_sample = econ_df.iloc[:len(econ_df):step]
    return systematic_sample

In [5]:
systematic_sample = systematic_sampling(econ_df, 10)
systematic_sample.head() # In this case, each 3 values.

Unnamed: 0,id,geo_point_2d,geo_shape,clave_cat,delegacion,perimetro,tipo,nom_id
0,0,"19.424781053,-99.1327537959","{""type"": ""Polygon"", ""coordinates"": [[[-99.1332...",307_130_11,Cuauhtémoc,B,Mercado,Pino Suárez
10,10,"19.4441424478,-99.14600807","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",003_048_10,Cuautémoc,B,Hotel,Moctezuma
20,20,"19.4357307042,-99.1326583218","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",004_098_26,Cuautémoc,A,Museo,La Caricatura
30,30,"19.427530818,-99.1479200065","{""type"": ""MultiPoint"", ""coordinates"": [[-99.14...",002_068_13,Cuautémoc,B,Hotel,Villa Pal
40,40,"19.4371002964,-99.1504966035","{""type"": ""MultiPoint"", ""coordinates"": [[-99.15...",011_291_35,Cuautémoc,B,Hotel,"Servicios Hoteleros de tlaxcala, S.A. DE C.V."


#### 4. Stratified random sampling.

In [6]:
econ_df['stratified'] = econ_df['delegacion'] + ", " + econ_df['tipo']
(econ_df['stratified'].value_counts()/len(econ_df)).sort_values(ascending=False)

Cuautémoc, Hotel                0.643478
Cuautémoc, Museo                0.156522
Venustiano Carranza, Hotel      0.078261
Cuauhtémoc, Mercado             0.073913
Venustiano Carranza, Mercado    0.047826
Name: stratified, dtype: float64

In [7]:
def data_stratified(econ_df, strat_column_values, strat_values, strat_prop, random_state=None):
    df_strat = pd.DataFrame(columns = econ_df.columns)
    pos = -1
    for i in range(len(strat_values)):
        pos += 1
        if pos == len(strat_values) - 1:
            ratio_len = len(econ_df) - len(df_strat)
        else:
            ratio_len = int(len(econ_df) * strat_prop[i])
            
        df_filtered = econ_df[econ_df[strat_column_values] == strat_values[i]]
        df_temp = df_filtered.sample(replace=True, n=ratio_len, random_state=random_state)
        
        df_strat = pd.concat([df_strat, df_temp])
    
    return df_temp

In [8]:
strat_values = ['Cuautémoc, Hotel', 'Cuautémoc, Museo', 'Venustiano Carranza, Hotel', 'Cuauhtémoc, Mercado', 'Venustiano Carranza, Mercado']
strat_props = [0.5, 0.2, 0.1, 0.1, 0.1]

In [9]:
df_strat = data_stratified(econ_df, 'stratified', strat_values, strat_props, random_state=42)
df_strat

Unnamed: 0,id,geo_point_2d,geo_shape,clave_cat,delegacion,perimetro,tipo,nom_id,stratified
157,157,"19.4272935771,-99.1227492994","{""type"": ""Polygon"", ""coordinates"": [[[-99.1230...",323_063_08,Venustiano Carranza,B,Mercado,Merced Comidas,"Venustiano Carranza, Mercado"
77,77,"19.4254536359,-99.1226996335","{""type"": ""Polygon"", ""coordinates"": [[[-99.1233...",323_101_01,Venustiano Carranza,B,Mercado,La Merced Nave Menor,"Venustiano Carranza, Mercado"
216,216,"19.4247697438,-99.1249707246","{""type"": ""Polygon"", ""coordinates"": [[[-99.1250...",323_118_37,Venustiano Carranza,B,Mercado,Lamininas de La Merced,"Venustiano Carranza, Mercado"
163,163,"19.4265454033,-99.1224859032","{""type"": ""Polygon"", ""coordinates"": [[[-99.1231...",323_063_05,Venustiano Carranza,B,Mercado,,"Venustiano Carranza, Mercado"
128,128,"19.4270781084,-99.1210175514","{""type"": ""Polygon"", ""coordinates"": [[[-99.1214...",323_061_04(123),Venustiano Carranza,B,Mercado,San Ciprian,"Venustiano Carranza, Mercado"
157,157,"19.4272935771,-99.1227492994","{""type"": ""Polygon"", ""coordinates"": [[[-99.1230...",323_063_08,Venustiano Carranza,B,Mercado,Merced Comidas,"Venustiano Carranza, Mercado"
204,204,"19.4260286762,-99.1249971994","{""type"": ""Polygon"", ""coordinates"": [[[-99.1253...",323_138_01,Venustiano Carranza,B,Mercado,Florería,"Venustiano Carranza, Mercado"
49,49,"19.4264953358,-99.1248854383","{""type"": ""Polygon"", ""coordinates"": [[[-99.1252...",323_139_01,Venustiano Carranza,B,Mercado,Dulceria Don Goloso,"Venustiano Carranza, Mercado"
157,157,"19.4272935771,-99.1227492994","{""type"": ""Polygon"", ""coordinates"": [[[-99.1230...",323_063_08,Venustiano Carranza,B,Mercado,Merced Comidas,"Venustiano Carranza, Mercado"
216,216,"19.4247697438,-99.1249707246","{""type"": ""Polygon"", ""coordinates"": [[[-99.1250...",323_118_37,Venustiano Carranza,B,Mercado,Lamininas de La Merced,"Venustiano Carranza, Mercado"


In [14]:
econ_df['stratified'] = econ_df['delegacion'] + " - " + econ_df['tipo']

In [20]:
def stratified_sample( category, size, data=econ_df):
  strat_values = data[category].unique()
  strat_props = (data[category].value_counts() / len(data))

  strat_sample = pd.DataFrame(columns = data.columns)

  for value in strat_values:
    strat_sample = pd.concat([strat_sample, data[(data[category] == value)].sample(n = round(strat_props[value]*size)) ])
  
  return strat_sample

strat_sample = stratified_sample('stratified', 40)
strat_sample

Unnamed: 0,id,geo_point_2d,geo_shape,clave_cat,delegacion,perimetro,tipo,nom_id,stratified
226,226,"19.4416748524,-99.1365878489","{""type"": ""Polygon"", ""coordinates"": [[[-99.1370...",004_052_01,Cuauhtémoc,B,Mercado,De Muebles,Cuauhtémoc - Mercado
0,0,"19.424781053,-99.1327537959","{""type"": ""Polygon"", ""coordinates"": [[[-99.1332...",307_130_11,Cuauhtémoc,B,Mercado,Pino Suárez,Cuauhtémoc - Mercado
162,162,"19.4452741596,-99.1443205075","{""type"": ""Polygon"", ""coordinates"": [[[-99.1448...",003_044_01,Cuauhtémoc,B,Mercado,Martínez de la Torre,Cuauhtémoc - Mercado
207,207,"19.4344738803,-99.1305689118","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",006_002_13,Cuautémoc,A,Museo,Autonomía Universitaria,Cuautémoc - Museo
175,175,"19.4291934671,-99.1323328561","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",006_073_11,Cuautémoc,A,Museo,La Ciudad de México,Cuautémoc - Museo
110,110,"19.4356877258,-99.1387888649","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",001_003_11,Cuautémoc,A,Museo,Museo interactivo de economía Mide,Cuautémoc - Museo
19,19,"19.4317119617,-99.1269115285","{""type"": ""MultiPoint"", ""coordinates"": [[-99.12...",006_026_28,Cuautémoc,A,Museo,Alondiga La Merced,Cuautémoc - Museo
2,2,"19.4340695945,-99.1306348409","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",006_002_12,Cuautémoc,A,Museo,Santa Teresa,Cuautémoc - Museo
18,18,"19.4331161255,-99.1309438719","{""type"": ""MultiPoint"", ""coordinates"": [[-99.13...",006_021_01,Cuautémoc,A,Museo,Museo Nacional de las Culturas,Cuautémoc - Museo
95,95,"19.4370305998,-99.1214093113","{""type"": ""MultiPoint"", ""coordinates"": [[-99.12...",318_114_01,Venustiano Carranza,B,Hotel,Seoul,Venustiano Carranza - Hotel
