### Importamos las librerias

In [2]:
# importacion general de librerias y de visualizacion (matplotlib y seaborn)
import pandas as pd
import numpy as np
import random as rd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

plt.style.use('default') # haciendo los graficos un poco mas bonitos en matplotlib
plt.rcParams['figure.figsize'] = (20, 10)

sns.set(style="whitegrid") # seteando tipo de grid en seaborn

pd.options.display.float_format = '{:20,.2f}'.format # suprimimos la notacion cientifica en los outputs

import warnings
warnings.filterwarnings('ignore')

### Importamos los datasets

In [6]:
df_train_values = pd.read_csv('../datasets/train_values.csv')
df_train_labels = pd.read_csv('../datasets/train_labels.csv')

df_train_values=df_train_values.astype({
    'land_surface_condition':'category',
    'foundation_type':'category',
    'roof_type':'category',
    'ground_floor_type':'category',
    'other_floor_type':'category',
    'position':'category',
    'plan_configuration':'category',

    'has_superstructure_adobe_mud':'bool',
    'has_superstructure_mud_mortar_stone':'bool',
    'has_superstructure_stone_flag':'bool',
    'has_superstructure_cement_mortar_stone':'bool', 
    'has_superstructure_mud_mortar_brick':'bool', 
    'has_superstructure_cement_mortar_brick':'bool', 
    'has_superstructure_timber':'bool', 
    'has_superstructure_bamboo':'bool', 
    'has_superstructure_rc_non_engineered':'bool', 
    'has_superstructure_rc_engineered':'bool', 
    'has_superstructure_other':'bool',

    'legal_ownership_status':'category',

    'has_secondary_use':'bool',
    'has_secondary_use_agriculture':'bool', 
    'has_secondary_use_hotel':'bool', 
    'has_secondary_use_rental':'bool', 
    'has_secondary_use_institution':'bool', 
    'has_secondary_use_school':'bool',
    'has_secondary_use_industry':'bool',
    'has_secondary_use_health_post':'bool',
    'has_secondary_use_gov_office':'bool',
    'has_secondary_use_use_police':'bool',
    'has_secondary_use_other':'bool'
})
df_train_labels = df_train_labels.astype({'damage_grade':'category'})

### Hacemos el análisis

In [7]:
df_has_superstructure = df_train_values[[
    'has_superstructure_adobe_mud',
    'has_superstructure_mud_mortar_stone',
    'has_superstructure_stone_flag',
    'has_superstructure_cement_mortar_stone',
    'has_superstructure_mud_mortar_brick',
    'has_superstructure_cement_mortar_brick',
    'has_superstructure_timber',
    'has_superstructure_bamboo',
    'has_superstructure_rc_non_engineered',
    'has_superstructure_rc_engineered',
    'has_superstructure_other'
]]

In [8]:
df_has_superstructure.head()

Unnamed: 0,has_superstructure_adobe_mud,has_superstructure_mud_mortar_stone,has_superstructure_stone_flag,has_superstructure_cement_mortar_stone,has_superstructure_mud_mortar_brick,has_superstructure_cement_mortar_brick,has_superstructure_timber,has_superstructure_bamboo,has_superstructure_rc_non_engineered,has_superstructure_rc_engineered,has_superstructure_other
0,True,True,False,False,False,False,False,False,False,False,False
1,False,True,False,False,False,False,False,False,False,False,False
2,False,True,False,False,False,False,False,False,False,False,False
3,False,True,False,False,False,False,True,True,False,False,False
4,True,False,False,False,False,False,False,False,False,False,False


In [9]:
df_has_superstructure.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260601 entries, 0 to 260600
Data columns (total 11 columns):
 #   Column                                  Non-Null Count   Dtype
---  ------                                  --------------   -----
 0   has_superstructure_adobe_mud            260601 non-null  bool 
 1   has_superstructure_mud_mortar_stone     260601 non-null  bool 
 2   has_superstructure_stone_flag           260601 non-null  bool 
 3   has_superstructure_cement_mortar_stone  260601 non-null  bool 
 4   has_superstructure_mud_mortar_brick     260601 non-null  bool 
 5   has_superstructure_cement_mortar_brick  260601 non-null  bool 
 6   has_superstructure_timber               260601 non-null  bool 
 7   has_superstructure_bamboo               260601 non-null  bool 
 8   has_superstructure_rc_non_engineered    260601 non-null  bool 
 9   has_superstructure_rc_engineered        260601 non-null  bool 
 10  has_superstructure_other                260601 non-null  bool 
dtype

In [10]:
df_has_superstructure['Suma_structure'] = df_has_superstructure.sum(axis=1)

In [11]:
df_has_superstructure['Suma_structure'].value_counts()

1    176016
2     57838
3     20210
4      4925
5      1259
6       314
7        35
8         4
Name: Suma_structure, dtype: int64

In [12]:
df_has_mas_superstruct = df_train_values[(df_has_superstructure['Suma_structure'] == 8)]

In [13]:
df_has_mas_superstruct

Unnamed: 0,building_id,geo_level_1_id,geo_level_2_id,geo_level_3_id,count_floors_pre_eq,age,area_percentage,height_percentage,land_surface_condition,foundation_type,...,has_secondary_use_agriculture,has_secondary_use_hotel,has_secondary_use_rental,has_secondary_use_institution,has_secondary_use_school,has_secondary_use_industry,has_secondary_use_health_post,has_secondary_use_gov_office,has_secondary_use_use_police,has_secondary_use_other
5577,747820,11,131,8614,2,0,11,7,t,r,...,False,False,False,False,False,False,False,False,False,False
53043,841313,23,774,9172,2,45,6,5,t,u,...,False,False,False,False,False,False,False,False,False,False
54684,166615,27,181,7810,5,70,20,10,t,r,...,False,True,False,False,False,False,False,False,False,False
251212,383507,7,901,11244,2,0,3,9,o,i,...,False,False,False,False,False,False,False,False,False,False


In [14]:
df_has_mas_superstruct.iloc[:,15:26]

Unnamed: 0,has_superstructure_adobe_mud,has_superstructure_mud_mortar_stone,has_superstructure_stone_flag,has_superstructure_cement_mortar_stone,has_superstructure_mud_mortar_brick,has_superstructure_cement_mortar_brick,has_superstructure_timber,has_superstructure_bamboo,has_superstructure_rc_non_engineered,has_superstructure_rc_engineered,has_superstructure_other
5577,True,False,False,True,True,True,True,True,True,False,True
53043,True,True,True,True,True,False,True,True,False,False,True
54684,True,True,True,True,True,True,True,False,True,False,False
251212,True,False,True,True,True,False,True,True,True,True,False


84585 edificios tiene más de un tipo de superstructure