# Near earth objects

#### https://cneos.jpl.nasa.gov/ca/

In [169]:
from matplotlib import pyplot as plt
from matplotlib import colors as colors
from datetime import date
import pandas as pd
import numpy as np

### Constants

In [170]:
zero=0
earth_distance_km = 6_371
iss_distance_km = 6_371+408
geostationary_distance_km = 35_786

### Filter

In [171]:
# Entfernung in Lunar Distances (float)
filter_entfernung = 10

# Alle NEOs bis einschließlich folgendes Jahr
filter_jahr_max = 2050

### data import, cleaning

In [172]:
df = pd.read_csv("cneos_closeapproach_data.csv", sep=",")
df = df.drop(columns=['CA Distance Minimum (LD | au)', 'V relative (km/s)', 'V infinity (km/s)', 'H (mag)', 'Unnamed: 8'])

In [173]:
df.head(2)

Unnamed: 0,Object,Close-Approach (CA) Date,CA Distance Nominal (LD | au),Diameter
0,99942 Apophis (2004 MN4),2029-Apr-13 21:46 ± < 00:01,0.10 | 0.00025,0.34±0.04 km
1,(2010 RF12),2095-Sep-06 00:06 ± 00:44,0.10 | 0.00026,5.6 m - 12 m


In [174]:
df.dtypes

Object                           object
Close-Approach (CA) Date         object
CA Distance Nominal (LD | au)    object
Diameter                         object
dtype: object

In [175]:
df["LD"] = df["CA Distance Nominal (LD | au)"].str.split(' ', expand = True)[0]

In [176]:
df.head(2)

Unnamed: 0,Object,Close-Approach (CA) Date,CA Distance Nominal (LD | au),Diameter,LD
0,99942 Apophis (2004 MN4),2029-Apr-13 21:46 ± < 00:01,0.10 | 0.00025,0.34±0.04 km,0.1
1,(2010 RF12),2095-Sep-06 00:06 ± 00:44,0.10 | 0.00026,5.6 m - 12 m,0.1


In [177]:
df["LD"] = pd.to_numeric(df["LD"], errors='coerce')

In [178]:
df.dtypes

Object                            object
Close-Approach (CA) Date          object
CA Distance Nominal (LD | au)     object
Diameter                          object
LD                               float64
dtype: object

In [179]:
# +/- ignorieren, ungefähre zeit reicht erstmal aus
df["approach_time"] = pd.to_datetime(df["Close-Approach (CA) Date"].str.split('±', expand = True)[0])
df["approach_time_month"] = pd.DatetimeIndex(df['approach_time']).month

In [180]:
df.index = np.arange(1, len(df) + 1 )
df["x_axis"] = df.index
df.head(2)

Unnamed: 0,Object,Close-Approach (CA) Date,CA Distance Nominal (LD | au),Diameter,LD,approach_time,approach_time_month,x_axis
1,99942 Apophis (2004 MN4),2029-Apr-13 21:46 ± < 00:01,0.10 | 0.00025,0.34±0.04 km,0.1,2029-04-13 21:46:00,4,1
2,(2010 RF12),2095-Sep-06 00:06 ± 00:44,0.10 | 0.00026,5.6 m - 12 m,0.1,2095-09-06 00:06:00,9,2


### calculation

In [181]:
df["distance_km"] = df["LD"]*394_399

In [182]:
df = df.loc[df["LD"] < filter_entfernung]

df = df.loc[df["approach_time"].dt.year <= filter_jahr_max]

In [183]:
df.head(2)

Unnamed: 0,Object,Close-Approach (CA) Date,CA Distance Nominal (LD | au),Diameter,LD,approach_time,approach_time_month,x_axis,distance_km
1,99942 Apophis (2004 MN4),2029-Apr-13 21:46 ± < 00:01,0.10 | 0.00025,0.34±0.04 km,0.1,2029-04-13 21:46:00,4,1,39439.9
4,(2007 UD6),2048-Oct-18 02:48 ± 2_09:26,0.22 | 0.00057,5.8 m - 13 m,0.22,2048-10-18 02:48:00,10,4,86767.78


### size

In [184]:
# km oder m auslesen
df["size_unit"] = df["Diameter"].str[-2:]


# Teile alle Durchmesser mit ±
df["d1"] = df["Diameter"].str.split('±', expand = True)[0]
df["d1"] = pd.to_numeric(df["d1"], errors='coerce')

# definiere size factor = 1000, ausser bei " m" = 1
df["size_factor"] = 1_000
df.loc[df["size_unit"] == " m", "size_factor"] = 1
df.head(3)

Unnamed: 0,Object,Close-Approach (CA) Date,CA Distance Nominal (LD | au),Diameter,LD,approach_time,approach_time_month,x_axis,distance_km,size_unit,d1,size_factor
1,99942 Apophis (2004 MN4),2029-Apr-13 21:46 ± < 00:01,0.10 | 0.00025,0.34±0.04 km,0.1,2029-04-13 21:46:00,4,1,39439.9,km,0.34,1000
4,(2007 UD6),2048-Oct-18 02:48 ± 2_09:26,0.22 | 0.00057,5.8 m - 13 m,0.22,2048-10-18 02:48:00,10,4,86767.78,m,,1
7,(2016 RD34),2047-Sep-05 10:42 ± 03:48,0.27 | 0.00069,7.7 m - 17 m,0.27,2047-09-05 10:42:00,9,7,106487.73,m,,1


In [185]:
# mittlerer Durchmesser ermitteln

df["d_von"] = df["Diameter"].str.split(' ', expand = True)[0]
df["d_von"] = pd.to_numeric(df["d_von"], errors='coerce')

df["d_bis"] = df["Diameter"].str.split('- ', expand = True)[1]
df["d_bis"] = df["d_bis"].str.split(' m', expand = True)[0]
df["d_bis"] = pd.to_numeric(df["d_bis"], errors='coerce')

df["mittel1"] = df["d1"] * df["size_factor"]
df["mittel1"] = df["mittel1"].fillna(1)

df["mittel2"] = (df["d_von"]+df["d_bis"]) *(1/2) * df["size_factor"]
df["mittel2"] = df["mittel2"].fillna(1)

df["diameter_mean"] = df["mittel1"] * df["mittel2"]

df = df.drop(columns=['d1', 
                      'd_von', 
                      'd_bis', 
                      'mittel1', 
                      'mittel2', 
                      'size_factor', 
                      'CA Distance Nominal (LD | au)', 
                      'size_unit'])

df.head(3)

Unnamed: 0,Object,Close-Approach (CA) Date,Diameter,LD,approach_time,approach_time_month,x_axis,distance_km,diameter_mean
1,99942 Apophis (2004 MN4),2029-Apr-13 21:46 ± < 00:01,0.34±0.04 km,0.1,2029-04-13 21:46:00,4,1,39439.9,340.0
4,(2007 UD6),2048-Oct-18 02:48 ± 2_09:26,5.8 m - 13 m,0.22,2048-10-18 02:48:00,10,4,86767.78,9.4
7,(2016 RD34),2047-Sep-05 10:42 ± 03:48,7.7 m - 17 m,0.27,2047-09-05 10:42:00,9,7,106487.73,12.35


In [186]:
# Liste erstellen für die Größen

In [187]:
max_größe = df["diameter_mean"].max()
max_größe

3070.0

In [188]:
df["marker_size"] = 300 * (df["diameter_mean"] / max_größe)



# definiere size factor = 1000, ausser bei " m" = 1
#df["marker_size2"] = 15
#df.loc[df["marker_size"] > 15, "marker_size2"] = 200 * (df["diameter_mean"] / max_größe)



df.head(4)

Unnamed: 0,Object,Close-Approach (CA) Date,Diameter,LD,approach_time,approach_time_month,x_axis,distance_km,diameter_mean,marker_size
1,99942 Apophis (2004 MN4),2029-Apr-13 21:46 ± < 00:01,0.34±0.04 km,0.1,2029-04-13 21:46:00,4,1,39439.9,340.0,33.224756
4,(2007 UD6),2048-Oct-18 02:48 ± 2_09:26,5.8 m - 13 m,0.22,2048-10-18 02:48:00,10,4,86767.78,9.4,0.918567
7,(2016 RD34),2047-Sep-05 10:42 ± 03:48,7.7 m - 17 m,0.27,2047-09-05 10:42:00,9,7,106487.73,12.35,1.20684
9,(2012 UE34),2041-Apr-08 02:55 ± < 00:01,58 m - 130 m,0.29,2041-04-08 02:55:00,4,9,114375.71,94.0,9.185668


### visualize

In [189]:
import matplotlib

plt.style.use('seaborn')

plt.figure(figsize=(16, 9))

grouped = df.groupby('approach_time_month')["Object"].count()


plt.bar(grouped[])

#plt.scatter(df["approach_time"], df["distance_km"], vmin=df["diameter_mean"].min(), vmax=df["diameter_mean"].max(), alpha=0.5, linewidths=1, c=df["diameter_mean"], cmap=plt.cm.get_cmap('rainbow'), marker="o", s=df["diameter_mean"] * 1, label="Near earth object")

#plt.axhline(y=geostationary_distance_km, color='red', alpha=0.7, linestyle='-', label="geostationary orbit", marker="", markersize=10)
#plt.axhline(y=iss_distance_km, color='orange', alpha=0.7, linestyle='-', label="iss orbit", marker="", markersize=10)
#plt.axhline(y=zero, color='blue', alpha=0.7, linestyle='-', label="earth", marker="", markersize=10)
#plt.axhspan(zero, earth_distance_km, facecolor='blue', alpha=0.7)

#plt.xlabel("Year of Approach", color="black", alpha=0.8, fontsize=18)
#plt.ylabel("Distance [km]", color="black", alpha=0.8, fontsize=18)

#plt.tick_params(labelsize=20, axis='y')
#plt.tick_params(labelsize=15, axis='x')

#plt.figure(1).autofmt_xdate()

legend = plt.legend(loc='center',
           bbox_to_anchor=(0.5, -0.2),
           fancybox=True,
           shadow=True,
           ncol=6,
           fontsize=10,
           prop = {
               'size': 16
           })

# fix für markersizes in legende: für alle markersizes in der legende fixe value setzen
# weil beim scatter plot + variablen markersize die markersize in der legende nicht fix ist, und daher viel zu groß wird.
for legobj in legend.legendHandles:
    legobj._sizes = [30]

plt.title(f' Near earth objects - distance to earth - diameter (Data: NASA) \n\n', fontsize=25)
plt.suptitle(f'Filter: distance < {filter_entfernung} Lunar Distances, Future only, max. diameter = {max_größe} m, max. year = {filter_jahr_max} \n https://cneos.jpl.nasa.gov/ca/', fontsize=15, y=0.95)

plt.savefig(f'NEO_{filter_entfernung} Lunar Distances.png', dpi=300, bbox_inches='tight')



TypeError: bar() missing 1 required positional argument: 'height'

<Figure size 1152x648 with 0 Axes>

In [None]:
df.head(50)

df["diameter_mean"].max()

In [None]:
df.plot.scatter(x=['x_axis'], y=['distance_km'], s=df['marker_size'], c='g')
