# Near miss asteroids for Earth in 2022

## This project will examine data from the NASA Near Earth Object Web Service for 2022 and take a further look into how close and potentially how dangerous some asteroids were to Earth. 

### More information about the project including how to get the project working on your computer is located in the readme in the directory of this project. 

In [None]:
import os
import requests
import json
import datetime
import pandas as pd 
import glob

# 1. Getting our asteroid data

## This section is optional and will show the method I used to download the NeoWs data for 2022 from NASA using their API. The process can be long so if you are not wanting to sign up for an API key from the NASA website and download the data manually, the needed data is included in the data/original folder in the directory. I have commented out key sections of this script to prevent accidently running it as it can take upwards of 5 minutes to download all files. If you are wanting to run this script to test it out, please look into the readme under the section titled "1. Getting our asteroid data." 

In [None]:
# Here is the script to download the json files
url = "https://api.nasa.gov/neo/rest/v1/feed"
# Remove this to input API key: api_key = "*insert API key here*"

# Specify the start and end dates for the first week and the folder path
start_date = "2022-01-01"
end_date = "2022-01-07"
folder_path = "data/downloaded"

# Our api only allows 7 days at a time, so let's iterate over the start and end dates for 7 days at a time (see below)
while start_date <= "2022-12-31":

    parameters = {
        "start_date": start_date,
        "end_date": end_date,
        "api_key": api_key
    }

    # Here is our actual API request
    response = requests.get(url, params=parameters)

    # Here we will save the file with a custom filename for the week it contains
    if response.status_code == 200:
        data = response.json()
        asteroids = data["near_earth_objects"]
        file_name = f"week_{start_date}.json"
        file_path = os.path.join(folder_path, file_name)

        # Save the json file and output a confirmation that the file was downloaded successfully
        with open(file_path, "w") as file:
            json.dump(asteroids, file)
            print(f"JSON data for {start_date} to {end_date} saved successfully.")
            #If this is successful, you will see 53 success responses, may take 5-10 minutes
    else:
        print(f"Failed to retrieve data for {start_date} to {end_date} from the API.")
  
        
    # Here we can use strptime, strftime from the datetime library to convert the start/end date to datetime elements
    # Then we can add 7 days to that datetime element and convert the value back to a string to be used in the request
    start_date = (datetime.datetime.strptime(start_date, "%Y-%m-%d") + datetime.timedelta(days=7)).strftime("%Y-%m-%d")
    end_date = (datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=7)).strftime("%Y-%m-%d")

# 2. Cleaning our NeoWs data

## Here we will be scraping our json files for only the specified data we want and putting that into a dataframe

## If you are downloading the data yourself to test out the code, please read the readme for this section under "2. Cleaning our NeoWs data" for the changes you need to make, otherwise you can leave this section as-is if you are using the included "original" data.


In [27]:
asteroid_data = []

# Let's use glob to iterate over the file names of our json files, and then load the json files into the data variable
for file_name in glob.glob('data/original/*.json'):
    with open(file_name) as file:
        data = json.load(file)
        
        # This loop allows us to iterate over each asteroid and take out particular data from each asteroid
        for date, asteroids in data.items():
            for asteroid in asteroids:
                asteroid_id = asteroid['id']
                asteroid_name = asteroid['name']
                asteroid_diameter_min = asteroid['estimated_diameter']['feet']['estimated_diameter_min']
                asteroid_diameter_max = asteroid['estimated_diameter']['feet']['estimated_diameter_max']
                asteroid_potentially_hazardous = asteroid['is_potentially_hazardous_asteroid']
                
                # Since we have a nested list in our json, we have to create a nested loop for this data
                for approach in asteroid['close_approach_data']:
                    
                    asteroid_relative_velocity = approach['relative_velocity']['miles_per_hour']
                    asteroid_miss_distance = approach['miss_distance']['miles']
                    asteroid_date = approach['close_approach_date']
                    
                    # Here we append our dictionary and create our new column names
                    asteroid_data.append({
                        'ID': asteroid_id,
                        'Name': asteroid_name,
                        'Diameter_Min_Feet': asteroid_diameter_min,
                        'Diameter_Max_Feet': asteroid_diameter_max,
                        'Relative_Velocity_MPH': asteroid_relative_velocity,
                        'Miss_Distance_Miles': asteroid_miss_distance,
                        'Is_Potentially_Hazardous': asteroid_potentially_hazardous,
                        'Date': asteroid_date
                    })
            
df = pd.DataFrame(asteroid_data)

# Let's add a column to give us the average estimated diameter
df['Average_Diameter_Feet'] = (df['Diameter_Min_Feet'] + df['Diameter_Max_Feet']) / 2

# Next let's convert our data from year-month-day to month-day-year
# We can do this by converting the date to date-time type and then changing the format
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].dt.strftime('%m-%d-%Y')

# Now let's rearrange the columns to make it easier to read
column_order = ['Date', 'Name', 'Diameter_Min_Feet', 'Diameter_Max_Feet', 'Average_Diameter_Feet', 'Relative_Velocity_MPH', 'Miss_Distance_Miles', 'Is_Potentially_Hazardous', 'ID']
df = df[column_order]

# Finally, lets sort our dataframe based on the date
df.sort_values('Date', inplace=True)


# Save the DataFrame to an Excel file
#df.to_excel('data/asteroidoutput.xlsx', index=False)