# Sesimic activity notebook

This notebook will clean and export a csv that can be used in the main model. The idea is to check whether there has been any type of earthquake in that region in between inspections

In [9]:
import pandas as pd

### Load Data

In [10]:
df_seism = pd.read_csv('seismic_activity_data.csv',sep=';')

### Data preparation

Changing the location of the earthquake to the equivalent province

In [11]:
provincias = {
    "A": "Alicante",
    "AB": "Albacete",
    "AL": "Almeria",
    'AL (MACRO)': 'Almeria',
    "AV": "Avila",
    "B": "Barcelona",
    "BA": "Badajoz",
    "BI": "Vizcaya",
    "BU": "Burgos",
    "C": "La Coruña",
    "CA": "Cádiz",
    "CC": "Caceres",
    "CE": "Ceuta",
    "CO": "Córdoba",
    "CR": "Ciudad Real",
    "CS": "Castellón",
    "CU": "Cuenca",
    "GC": "Las Palmas",
    "GI": "Girona",
    "GR": "Granada",
    "GU": "Guadalajara",
    "H": "Huelva",
    "HU": "Huesca",
    'IMA': 'Mallorca',
    'IBZ':'Ibiza',
    "J": "Jaén",
    "L": "Lleida",
    "LE": "León",
    "LO": "La Rioja",
    "LU": "Lugo",
    "M": "Madrid",
    "MA": "Málaga",
    "ML": "Melilla",
    "MU": "Murcia",
    "NA": "Navarra",
    'N': 'Navarra',
    "O": "Asturias",
    "OU": "Orense",
    "P": "Palencia",
    "PM": "Baleares",
    "PO": "Pontevedra",
    "S": "Cantabria",
    "SA": "Salamanca",
    "SE": "Sevilla",
    "SG": "Segovia",
    "SO": "Soria",
    "SS": "Guipúzcoa",
    "T": "Tarragona",
    "TE": "Teruel",
    "TF": "Santa Cruz de Tenerife",
    "TO": "Toledo",
    "V": "Valencia",
    "VA": "Valladolid",
    "VI": "Vitoria",
    "Z": "Zaragoza",
    "ZA": "Zamora",
    'FRA': "Francia",
    'POR':'Portugal',
    'ARG':'Argelia',
    'AND':'Andorra',
    'MAC': 'Marruecos',
    'F':'Ni idea',
    '':'Empty'
    
}


In [12]:
lst = []

for idx in df_seism.index:
    loc = df_seism['Localización'][idx]
    if '.'  in loc:
        strings = loc.split('.')
        lst.append(provincias[strings[-1]])
    else:
        df_seism = df_seism.drop(idx,axis=0)


df_seism['Province'] = lst


In [13]:
df_seism = df_seism.rename(columns={'       Fecha':'Fecha'})
df_seism

Unnamed: 0,Evento,Fecha,Hora,Latitud,Longitud,Prof. (Km),Inten.,Mag.,Tipo Mag.,Localización,Province
0,965079,01/01/2010,05:03:09,42.3508,-8.5852,15.7,,1.7,4,NW O RIAL.PO,Pontevedra
1,965085,01/01/2010,05:51:27,37.2451,-3.7651,0.0,,1.4,4,SW PINOS PUENTE.GR,Granada
2,965093,01/01/2010,08:54:48,42.1167,-7.2670,14.7,,2.1,4,SW VILARIÑO DE CONSO.OU,Orense
3,965094,01/01/2010,11:02:45,42.8225,-7.2387,10.6,,2.1,4,N TRIACASTELA.LU,Lugo
4,965103,01/01/2010,13:45:47,36.4763,-5.1733,19.9,I-II,1.6,4,NW ESTEPONA.MA,Málaga
...,...,...,...,...,...,...,...,...,...,...,...
58205,es2022zqgzn,31/12/2022,14:13:22,37.0958,-1.9327,2.0,,1.6,4,SW TURRE.AL,Almeria
58206,es2022zqjun,31/12/2022,15:38:27,37.3163,-2.1896,10.0,,1.4,4,NW ALBANCHEZ.AL,Almeria
58207,es2022zqlkf,31/12/2022,16:27:03,43.2316,-7.1264,1.0,,2.4,4,NE RIBEIRA DE PIQUÍN.LU,Lugo
58208,es2022zqoka,31/12/2022,17:57:46,37.4248,-2.4505,7.0,,1.2,4,NW LÚCAR.AL,Almeria


# Feature Engineering

In [14]:
df_seism['Fecha'] = pd.to_datetime(df_seism['Fecha'])
df_seism['Seism'] = 1
df_seism['Year'] = df_seism['Fecha'].dt.year
df_seism = df_seism.drop(['      Evento', '        Hora', '     Latitud',
                            '    Longitud', '  Prof. (Km)', '      Inten.','   Tipo Mag.', 'Localización','        Mag.'],axis=1)

df_seism

Unnamed: 0,Fecha,Province,Seism,Year
0,2010-01-01,Pontevedra,1,2010
1,2010-01-01,Granada,1,2010
2,2010-01-01,Orense,1,2010
3,2010-01-01,Lugo,1,2010
4,2010-01-01,Málaga,1,2010
...,...,...,...,...
58205,2022-12-31,Almeria,1,2022
58206,2022-12-31,Almeria,1,2022
58207,2022-12-31,Lugo,1,2022
58208,2022-12-31,Almeria,1,2022


In [15]:
df_seism = df_seism.sort_values(by='Fecha')
df_seism['AcumSeism'] = df_seism.groupby(['Province'])['Seism'].cumsum()


In [16]:
df_seism[df_seism.Province == 'Segovia']

Unnamed: 0,Fecha,Province,Seism,Year,AcumSeism
1536,2010-06-27,Segovia,1,2010,1
1541,2010-06-28,Segovia,1,2010,2
1550,2010-06-29,Segovia,1,2010,3
16244,2013-01-10,Segovia,1,2013,4
14158,2013-04-30,Segovia,1,2013,5
29668,2017-03-25,Segovia,1,2017,6
34578,2018-06-26,Segovia,1,2018,7


### Export

In [17]:
df_seism.to_csv('Sismic_activity_dataframe.csv',index=False)