# 1. Análisis exploratorio de sismos

**Objetivo:** Exploración del dataset

Esta base de de datos es una colección de más de 23,000 sismos en USA. Contiene datos desde 1638 a 1985. La base de datos incluye información correspondiente a las coordenadas del epicentro, magnitudes, profundidad focal, nombres y coordenadas de ciudades reportadas, intensidades reportadas y la distancia de la ciudad al epicentro.


https://www.kaggle.com/srijya/us-earthquake-intensity-database

**Información de las características**
* 0 Year Mo Da Hr Mn Sec
* 1 UTC Conv
* 2 U/G Unpublished or grouped intensity
* 3 EQ Lat 
* 4 EQ Long
* 5 Magnitude
* 6 Depth (km)
* 7 Epi Dis Distancia epicentral
* 8 City Lat
* 9 City Long
* 10 MMI
* 11 State Code
* 12 City Name
* 13 Data Source

**Número de instancias:** 157015

**Número de atributos:** 20



# 2. Acceso a Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# 3. Importando librerías

In [None]:
import _____ as pd
import os
import _____ as plt
import seaborn as ___
import ____ as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px
import plotly.io as pio
from sklearn.neighbors import LocalOutlierFactor

# 4. Lectura del archivo de datos

In [None]:
path = r'/content/drive/Shareddrives/Data Science para Geociencias/2. Preparación de los datos'
name = 'eqint_tsqp.xlsx'

In [None]:
sis_path = os.path.join(____, ____)
sismosdf = pd.____(sis_path)
_____.head()

In [None]:
_____.shape

(157015, 20)

In [None]:
fig, ax = plt.subplots(figsize=(25,8))
sns.heatmap(sismosdf.isnull(), ax=ax, cmap="magma")
plt.grid()
plt.show()

# 5. Limpieza de datos

In [None]:
sismosdf.drop([_____, ______, ______, ______, ______], axis=1, inplace=True)

In [None]:
sismosdf['SECOND'].fillna(0, inplace=True)
sismosdf['MINUTE'].fillna(sismosdf['MINUTE'].____, inplace=True)
sismosdf['HOUR'].fillna(sismosdf['HOUR'].___, inplace=True)
sismosdf['EPIDIST'].fillna(sismosdf['EPIDIST'].____, inplace=True)

In [None]:
sismosdf = sismosdf[sismosdf.___.notnull()]
sismosdf = sismosdf[sismosdf.___.notnull()]
sismosdf = sismosdf[sismosdf.___.notnull()]
sismosdf = sismosdf[sismosdf.___.notnull()]
sismosdf = sismosdf[sismosdf.___.notnull()]
sismosdf = sismosdf[sismosdf.___.notnull()]
sismosdf = sismosdf[sismosdf.___.notnull()]

In [None]:
fig, ax = plt.subplots(figsize=(25,8))
sns.heatmap(sismosdf.isnull(), ax=ax, cmap="magma")
plt.grid()
plt.show()

In [None]:
____.shape

# 6. Exploración de los datos

In [None]:
fig, ax = plt.subplots(figsize=(25,5))
sns.countplot(ax=ax, x=____['MMI'], color='darksalmon')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(25,5))
sns.countplot(ax=ax, x=_____['SOURCE'], color='blue')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(25,5))
sns.countplot(ax=ax, x=____['MAGNITUDE'], color='turquoise')
plt.show()

In [None]:
_____['STATE'].value_counts().plot(kind='bar', figsize=(15,5), grid=False, color='darkorange')

In [None]:
fig, ax = plt.subplots(figsize=(40,5))
sns.countplot(ax=ax, x=_____['YEAR'], color='mediumorchid')
plt.show()

In [None]:
fig = go.Figure(data=go.Scattergeo(
        lon = _____['CITY_LON'],
        lat = _____['CITY_LAT'],
        text = ____['MMI'],
        marker = dict(
        color = _____['MMI'],
        colorscale = 'Rainbow',
        reversescale = True,
        opacity = 0.7,
        size = 5,
        colorbar = dict(
            titleside = "right",
            outlinecolor = "rgba(68, 68, 68, 0)",
            ticks = "outside",
            showticksuffix = "last",
            dtick = 1
        )
    )
        ))
fig.update_traces(marker=dict(size=15),
                  selector=dict(mode='markers'))
fig.update_layout(
    margin=dict(l=0, r=0, b=0, t=25),
    title = 'Intensidad de sismos',
    geo_scope='usa',
    )
fig.show()

In [None]:
sismosdf = sismosdf.set_index(['YEAR', 'MONTH'])
index_list = sismosdf.index.levels[0].tolist()
frames=[{
        'name': 'frame_{}'.format(i),

        'data': [{
            'type': 'scattermapbox',
            'lat': sismosdf.xs(i)['CITY_LAT'],
            'lon': sismosdf.xs(i)['CITY_LON'],
            'marker': go.scattermapbox.Marker(
                size=(sismosdf.xs(i)['MMI']-sismosdf.xs(i)['MMI'].mean()+3.5)**2 +sismosdf.xs(i)['MMI'],
                color=sismosdf.xs(i)['MMI'],
                showscale=True,
                colorscale='Jet',
                colorbar={'title': 'Intensidad', 'titleside': 'top', 'thickness': 4, 'ticksuffix': ' MMI'}
                ),
        'customdata': np.stack((
            sismosdf.xs(i)['MMI'],
            sismosdf.xs(i)['MAGNITUDE'],
            pd.Series(sismosdf.xs(i).index)),
                axis=-1
                ),
        'hovertemplate': "<extra></extra><em> Intensidad  %{customdata[0]}<br>Magnitud  %{customdata[1]}",
        }],
    } for i in index_list]

sliders = [{
            'transition':{'duration': 0},
            'x':0.08,
            'len':0.88,
            'currentvalue':{'font':{'size':15}, 'prefix':'Año ', 'visible':True, 'xanchor':'center'},
            'steps':[
                {
                    'label':i,
                    'method':'animate',
                    'args':[
                        ['frame_{}'.format(i)],
                        {'mode':'immediate', 'frame':{'duration':1000, 'redraw': True}, 'transition':{'duration':1000, }}
                      ],
                } for i in index_list]
        }]


# Primer cuadro
data = frames[0]['data']

# Añadiendo los sliders al layout
layout = go.Layout(
    sliders=sliders,
    margin=dict(l=0, r=0, b=0, t=25),
    title = 'Intensidad de sismos por año',
    mapbox={
        'accesstoken':'pk.eyJ1IjoiY2xhdWNvdCIsImEiOiJja2h4MTIxd2UwMzNzMnlvNzVycXN2dW14In0.J_KSzOY3YGDzozRdHvMJgQ',
        'center':{"lat": sismosdf['CITY_LAT'].mean(), "lon": sismosdf['CITY_LON'].mean()+10},
        'zoom':3,
        'style':'light',
    }
)
fig = go.Figure(data=data, layout=layout, frames=frames)
fig.show()

## e) Escalamiento y Codificación de etiquetas

In [None]:
sismosdf = _____[_____.STATE == 'CA']
______.drop(['CITY','SOURCE','COUNTRY', 'DAY', 'HOUR','MINUTE','SECOND','STATE'], axis=1, inplace=True)

In [None]:
_____.head()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_sismos = scaler.fit_transform(____)

In [None]:
sismosdf.drop(['CITY_LAT','CITY_LON'], axis=1, inplace=True)

In [None]:
____.head()

# Detección de Outliers

In [None]:
fig = px.scatter_3d(data_frame=_____,
                    x='MAGNITUDE',
                    y='EPIDIST',
                    z='MMI',
                    color_continuous_scale=px.colors.sequential.Plasma,
                    template='seaborn',
                    title='Visualización de los datos',
                    width=1300,
                    height=500)
fig.update_layout(margin=dict(l=0, r=0, b=0, t=25))
fig.update_traces(marker=dict(size=1),
                  selector=dict(mode='markers'))
pio.show(fig)

In [None]:
clf = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
y_pred = clf.fit_predict(____)

In [None]:
y_pred

In [None]:
sismosdf['OUTLIER'] = _____
_____.head()

In [None]:
fig = px.scatter_3d(data_frame=_____,
                    x='MAGNITUDE',
                    y='EPIDIST',
                    z='MMI',
                    color='OUTLIER',
                    color_continuous_scale=px.colors.sequential.Bluered,
                    template='seaborn',
                    title='Visualización de Outliers',
                    width=1300,
                    height=500)
fig.update_layout(margin=dict(l=0, r=0, b=0, t=25))
fig.update_traces(marker=dict(size=1),
                  selector=dict(mode='markers'))
pio.show(fig)