## Table of Contents
* [Data Preparation](#1)
* [Basic Explorations](#2)
* [Geospatial Visualization](#3)

In [None]:
# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

# missing values visualization
import missingno as msno

# statistics
from statsmodels.distributions.empirical_distribution import ECDF

# maps
import folium

In [None]:
# import data and preview
df_orig = pd.read_csv('../input/landings-of-meteorite/LANDINGS OF METEORITE - Meteorite-Landings.csv')
# data overview
df_orig.info()

<a id='1'></a>
# Data Preparation

In [None]:
# remove redundancies
df = df_orig.copy()
df = df.drop(['nametype','GeoLocation'], axis=1)
df.head()

In [None]:
# show structure of missings
msno.matrix(df)
plt.show()

In [None]:
# extract year (date and time are just dummies)
df.year = df.year.astype(str).apply(lambda x : x[6:10])
df.year = pd.to_numeric(df.year)
# we have some missing, let's impute
df.year[df.year.isna()] = -1
df.year = df.year.astype(int)
df.head()

In [None]:
# let's check for strange values
df[(df.year<1000) | (df.year>2021)]

In [None]:
# check in original data if the two years below 1000 are plausible
print(df_orig.iloc[678])
print(df_orig.iloc[703])

<a id='2'></a>
# Basic Explorations

In [None]:
# basic summary stats
df.describe(include='all')

In [None]:
# distribution of class
df.recclass.value_counts()

In [None]:
# plot most frequent classes
df.recclass.value_counts()[0:20].plot(kind='bar', figsize=(8,4))
plt.title('Top 20 Classes')
plt.grid()
plt.show()

In [None]:
# distribution of "fall"
df.fall.value_counts().plot(kind='bar')
plt.title('Fell or Found')
plt.grid()
plt.show()

In [None]:
# plot mass distribution
df['mass (g)'].plot(kind='hist', bins=100, figsize=(8,4))
plt.title('mass[g]')
plt.grid()
plt.show()

#### Let's try a log plot:

In [None]:
# logarithmic plot of mass
np.log10(df['mass (g)']).plot(kind='hist', bins=100, figsize=(8,4))
plt.title('log10(mass[g])')
plt.grid()
plt.show()

In [None]:
# empirical CDF of log10(mass)
ecdf = ECDF(np.log10(df['mass (g)']))
plt.plot(ecdf.x, ecdf.y)
plt.title('Empirical CDF')
plt.xlabel('log10(mass[g])')
plt.ylabel('Empirical CDF')
plt.grid()
plt.show()

In [None]:
# top 10 meteorites by mass
n_top = 10
df_top = df.sort_values(by='mass (g)', ascending=False)[0:n_top].copy()
df_top

In [None]:
# plot year distribution; exclude missings (encoded as -1)
df.year[df.year>-1].plot(kind='hist', bins=100)
plt.title('Year - Missings excluded')
plt.grid()
plt.show()

In [None]:
# plot year distribution; show only years from 1800
df.year[df.year>=1800].plot(kind='hist', bins=100)
plt.title('Year >= 1800')
plt.grid()
plt.show()

#### Probably we do not really have much more impacts in the more recent years but it seems to be just an effect of much more research done...  

<a id='3'></a>
# Geospatial Visualization

In [None]:
# for the following map remove rows having missing coordinates
df_complete = df.dropna(subset=['reclat','reclong'])

# remove also rows with (0,0)-coordinates
df_complete = df_complete[df_complete.reclat**2 + df_complete.reclong**2 > 0]

In [None]:
# basic plot of locations
plt.figure(figsize=(14,8))
plt.scatter(x=df_complete.reclong,
            y=df_complete.reclat,
            s=np.sqrt(df_complete['mass (g)']),
            color='blue',
            alpha=0.1)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid()
plt.title('Landings of Meteorites')
plt.show()

In [None]:
# interactive map
zoom_factor = 2 # inital map size

my_map_1 = folium.Map(location=[0,0], zoom_start=zoom_factor)

for i in range(0,df_complete.shape[0]):
    folium.CircleMarker(
       location=[df_complete.iloc[i]['reclat'], df_complete.iloc[i]['reclong']],
       radius=0.01*np.sqrt(df_complete.iloc[i]['mass (g)']),
       popup='Name: ' + df_complete.iloc[i]['name'] + ' - Year:' + str(df_complete.iloc[i]['year']),
       color='blue',
       weigth=1, # stroke width in pixel
       opacity=0.25, # stroke opacity
       fill=True,
       fill_opacity=0.25
    ).add_to(my_map_1)

my_map_1 # display