In [1]:
import pandas as pd
df = pd.read_csv(filepath_or_buffer='/kaggle/input/turkey-earthquake-data-1914-2023/veriler.csv', parse_dates=['Olus tarihi'], index_col=[0])
df['year'] = df['Olus tarihi'].dt.year
df['xm_int'] = df['xM'].apply(round, args=(0, ))
df.head()

Unnamed: 0_level_0,Deprem Kodu,Olus tarihi,Olus zamani,Enlem,Boylam,Der(km),xM,MD,ML,Mw,Ms,Mb,Tip,Yer,year,xm_int
No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,20230731215338,2023-07-31,21:53:38.29,37.4153,37.1703,4.0,3.5,0.0,3.5,3.4,0.0,0.0,Ke,DOGANLI-PAZARCIK (KAHRAMANMARAS) [East 1.3 km],2023,4.0
2,20230731152316,2023-07-31,15:23:16.85,38.1413,38.434,5.2,3.5,0.0,3.5,3.4,0.0,0.0,Ke,AKSU-SINCIK (ADIYAMAN) [West 6.4 km],2023,4.0
3,20230731025049,2023-07-31,02:50:49.84,37.8352,26.8353,6.2,3.0,0.0,3.0,0.0,0.0,0.0,Ke,SISAM ADASI (EGE DENIZI),2023,3.0
4,20230730201940,2023-07-30,20:19:40.49,40.8085,31.0183,5.3,3.1,0.0,3.1,3.1,0.0,0.0,Ke,ICMELER-GOLYAKA (DUZCE) [South East 0.7 km],2023,3.0
5,20230730161352,2023-07-30,16:13:52.62,38.129,38.1863,2.4,3.0,0.0,3.0,0.0,0.0,0.0,Ke,CAMPINAR-CELIKHAN (ADIYAMAN) [North West 4.3 km],2023,3.0


In [2]:
df.shape

(50000, 16)

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 50000 entries, 1 to 50000
Data columns (total 16 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Deprem Kodu  50000 non-null  int64         
 1   Olus tarihi  50000 non-null  datetime64[ns]
 2   Olus zamani  50000 non-null  object        
 3   Enlem        50000 non-null  float64       
 4   Boylam       50000 non-null  float64       
 5   Der(km)      50000 non-null  float64       
 6   xM           50000 non-null  float64       
 7   MD           50000 non-null  float64       
 8   ML           50000 non-null  float64       
 9   Mw           12922 non-null  float64       
 10  Ms           50000 non-null  float64       
 11  Mb           50000 non-null  float64       
 12  Tip          50000 non-null  object        
 13  Yer          50000 non-null  object        
 14  year         50000 non-null  int32         
 15  xm_int       50000 non-null  float64       
dtypes: dateti

In [4]:
df.nunique()

Deprem Kodu    50000
Olus tarihi     9639
Olus zamani    49746
Enlem          20622
Boylam         28384
Der(km)          921
xM                42
MD                37
ML                45
Mw                46
Ms                12
Mb                24
Tip                2
Yer            33489
year              30
xm_int             6
dtype: int64

In [5]:
df['Tip'].value_counts()

Tip
Ke    49886
Sm      114
Name: count, dtype: int64

In [6]:
from plotly.express import scatter_geo
# Apparently plotly does not consider Turkey to be part of Africa or Europe, which is awkard. So we need to set scope to be world.
scatter_geo(data_frame=df[df['xM'] >= 3.1], lat='Enlem', lon='Boylam', color='xM', scope='world',  opacity=0.9, color_continuous_scale='Reds',
            hover_name='Yer',
            fitbounds='locations', basemap_visible=True,).update_layout(autosize=True, )

From the data card we know that this dataset contains earthquakes magnitude 3 or greater, so xM must be magnitude. I've floored the xM here for performance reasons.

In [7]:
# what percentage of the data do we see in the map above?
round(100 * len(df[df['xM'] >= 3.1])/len(df), 2)

78.27

In [8]:
from plotly.express import histogram
histogram(data_frame=df, x='Boylam', color='xm_int')

In [9]:
histogram(data_frame=df, y='Enlem', color='xm_int')

These two charts tell us the density in the lat and lon directions and remind us that magnitude is a log scale.

In [10]:
from plotly.express import pie
pie(data_frame=df[['xm_int']], names='xm_int' )

Let's look at the distributions of our other columns and see if they tell us anything.

In [11]:
histogram(data_frame=df, x='year', color='MD')

In [12]:
histogram(data_frame=df, x='year', color='ML')

It looks like MD and ML are magnitudes, but for two different periods of time.

In [13]:
histogram(data_frame=df, x='year', color='Mw')

In [14]:
histogram(data_frame=df, x='year', color='Ms')

In [15]:
histogram(data_frame=df, x='year', color='Mb')

I think this tells us that we have data from five sources rolled up into our xM column.

In [16]:
histogram(data_frame=df, x='xM', color='year')

In [17]:
histogram(data_frame=df, x='Deprem Kodu', color='year')

In [18]:
from plotly.express import scatter
scatter(data_frame=df, x='year', y='xM', color='xm_int', trendline='ols')

Yes it does look like earthquakes are getting more severe.