### Exploratory Data Analysis (EDA)


- Exploratory data analysis

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px


#### Import data

In [2]:
df = pd.read_csv("data/mexico-real-estate-clean.csv")
df.shape
df.head()

Unnamed: 0,property_type,state,lat,lon,area_m2,price_usd
0,house,Estado de México,19.560181,-99.233528,150.0,67965.56
1,house,Nuevo León,25.688436,-100.198807,186.0,63223.78
2,apartment,Guerrero,16.767704,-99.764383,82.0,84298.37
3,apartment,Guerrero,16.829782,-99.911012,150.0,94308.8
4,house,Yucatán,21.052583,-89.538639,205.0,105191.37


#### Location data: Scatter Mapbox Plot

In [4]:
""" 
Using plotly to create the figure from df data.
Map will be centered on Mexico City.
The map displays price when hovering mouse over house
"""
fig=px.scatter_mapbox(
    df, 
    lat="lat",
    lon="lon",
    center={"lat": 19.43, "lon": -99.13},  
    width=600, 
    height=600,  
    hover_data=["price_usd"],  
)

fig.update_layout(mapbox_style="open-street-map")


fig.show()


#### Categorical Data: Most common states

In [15]:
"""Number of unique states""" 
df["state"].nunique()

30

In [16]:
"""Display the 30 unique states""" 
df["state"].unique()

array(['Estado de México', 'Nuevo León', 'Guerrero', 'Yucatán',
       'Querétaro', 'Morelos', 'Chiapas', 'Tabasco', 'Distrito Federal',
       'Nayarit', 'Puebla', 'Veracruz de Ignacio de la Llave', 'Sinaloa',
       'Tamaulipas', 'Jalisco', 'San Luis Potosí', 'Baja California',
       'Hidalgo', 'Quintana Roo', 'Sonora', 'Chihuahua',
       'Baja California Sur', 'Zacatecas', 'Aguascalientes', 'Guanajuato',
       'Durango', 'Tlaxcala', 'Colima', 'Oaxaca', 'Campeche'],
      dtype=object)

In [17]:
"""Count each states in the df""" 
df["state"].value_counts()

state
Distrito Federal                   303
Estado de México                   179
Yucatán                            171
Morelos                            160
Querétaro                          128
Veracruz de Ignacio de la Llave    117
Puebla                              95
Nuevo León                          83
Jalisco                             60
San Luis Potosí                     55
Chiapas                             55
Guerrero                            49
Tamaulipas                          48
Quintana Roo                        38
Baja California                     29
Sinaloa                             26
Chihuahua                           20
Tabasco                             20
Hidalgo                             17
Baja California Sur                 15
Sonora                              12
Guanajuato                          12
Aguascalientes                      10
Nayarit                              9
Durango                              7
Tlaxcala           

In [19]:
""" The 10 most prevalent cities """ 
df["state"].value_counts().head(10)

state
Distrito Federal                   303
Estado de México                   179
Yucatán                            171
Morelos                            160
Querétaro                          128
Veracruz de Ignacio de la Llave    117
Puebla                              95
Nuevo León                          83
Jalisco                             60
San Luis Potosí                     55
Name: count, dtype: int64