In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import janitor

In [4]:
meteorites = pd.read_csv("meteorite_landings.csv").clean_names()

1) Split in column GeoLocation into latitude and longitude, the new latitude and longitude columns should be numeric.

In [5]:
lat_long = meteorites["geolocation"].str.split(pat=",", expand=True)

In [6]:
latitude = lat_long[0].str.extract(r'([\d]+[.][\d]*)')

In [7]:
longtitude = lat_long[1].str.extract(r'([-]*[\d]*[.][\d]*)')

In [8]:
meteorites["longtitude"] = longtitude

In [9]:
meteorites["latitude"] = latitude

In [10]:
meteorites = meteorites.drop(columns="geolocation")

In [11]:
meteorites["latitude"] = meteorites["latitude"].astype("float64")

In [12]:
meteorites["longtitude"] = meteorites["longtitude"].astype("float64")

In [13]:
meteorites

Unnamed: 0,id,name,mass_g_,fall,year,longtitude,latitude
0,1,Aachen,21.0,Fell,1880.0,6.08333,50.77500
1,2,Aarhus,720.0,Fell,1951.0,10.23333,56.18333
2,4,Abajo,331.0,Found,1982.0,-105.41667,26.80000
3,5,Abbott,21100.0,Found,1951.0,-104.28333,36.30000
4,6,Abee,107000.0,Fell,1952.0,-113.00000,54.21667
...,...,...,...,...,...,...,...
45711,57454,Mandalay Spring,2854.0,Found,2012.0,-118.55319,40.89201
45712,57455,Antelope,754.0,Found,2012.0,-118.54465,40.90183
45713,57456,Northwest Africa 7870,42.0,Found,2004.0,0.00000,0.00000
45714,57457,Northwest Africa 7871,450.0,Found,2004.0,0.00000,0.00000


2) Replace any missing values in latitude and longitude with zeros.

In [14]:
meteorites.info() # check for NAs

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45716 entries, 0 to 45715
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          45716 non-null  int64  
 1   name        45716 non-null  object 
 2   mass_g_     45585 non-null  float64
 3   fall        45716 non-null  object 
 4   year        45417 non-null  float64
 5   longtitude  38401 non-null  float64
 6   latitude    38401 non-null  float64
dtypes: float64(4), int64(1), object(2)
memory usage: 2.4+ MB


In [15]:
meteorites["longtitude"] = meteorites["longtitude"].fillna("0")

In [16]:
meteorites["latitude"] = meteorites["latitude"].fillna("0")

In [17]:
meteorites["longtitude"].isna().sum() # check for NAs

0

In [18]:
meteorites["latitude"].isna().sum() # check for NAs

0

3) Remove meteorites less than 1000g in weight from the data.

In [19]:
meteorites = meteorites[meteorites["mass_g_"] >= 1000]

In [20]:
meteorites.describe()

Unnamed: 0,id,mass_g_,year
count,4871.0,4871.0,4814.0
mean,22885.021351,123403.5,1968.234109
std,16508.512824,1755279.0,50.412121
min,5.0,1000.0,1583.0
25%,10166.0,1686.285,1945.0
50%,18009.0,3600.0,1990.5
75%,32661.5,12000.0,2002.0
max,57454.0,60000000.0,2013.0


4) Order the data by the year of discovery.

In [21]:
meteorites = meteorites.sort_values(by="year", ascending=False)

5) Find the names and years found for the 10 largest meteorites in the data.

In [26]:
meteorites.loc[:, ["name", "mass_g_"]].nlargest(10, columns="mass_g_")

Unnamed: 0,name,mass_g_
10664,Hoba,60000000.0
4126,Cape York,58200000.0
4113,Campo del Cielo,50000000.0
4122,Canyon Diablo,30000000.0
2242,Armanty,28000000.0
9696,Gibeon,26000000.0
4216,Chupaderos,24300000.0
15555,Mundrabilla,24000000.0
22241,Sikhote-Alin,23000000.0
3801,Bacubirito,22000000.0


6) Find the average mass of meteorites that were recorded falling, vs. those which were just found.

In [27]:
meteorites

Unnamed: 0,id,name,mass_g_,fall,year,longtitude,latitude
45702,57427,Northwest Africa 7863,1000.0,Found,2013.0,0,0
45488,57165,Chelyabinsk,100000.0,Fell,2013.0,61.1167,54.8167
45711,57454,Mandalay Spring,2854.0,Found,2012.0,-118.553,40.892
44988,56646,Northwest Africa 7615,1100.0,Found,2012.0,0,0
43832,55333,Northwest Africa 7251,13000.0,Found,2012.0,0,0
...,...,...,...,...,...,...,...
44857,56491,Jiddat al Harasis 761,1251.0,Found,,56.3919,19.945
44860,56494,Jiddat al Harasis 764,1236.0,Found,,56.4817,19.8064
44882,56516,Ramlat as Sahmah 428,16247.0,Found,,56.3269,20.1036
44889,56523,Jiddat al Harasis 791,1194.0,Found,,55.7039,19.7286


In [33]:
meteorites.groupby("fall").agg({"mass_g_": "mean"}).reset_index().round()

Unnamed: 0,fall,mass_g_
0,Fell,68033.0
1,Found,133354.0


7) Find the number of meteorites in each year, for every year since 2000.

In [49]:
meteorites[meteorites["year"] >= 2000]["year"].value_counts()

2000.0    235
2003.0    209
2002.0    203
2001.0    186
2006.0    167
2005.0    146
2004.0    141
2011.0    121
2009.0    103
2008.0     96
2010.0     89
2007.0     71
2012.0     53
2013.0      2
Name: year, dtype: int64