Question 4 -

Write a program to download the data from the link given below and then read the data and convert the into the proper structure and return it as a CSV file.

Link - https://data.nasa.gov/resource/y77d-th95.json

Note - Write code comments wherever needed for code understanding.


In [34]:
import pandas as pd
import requests

# Function to download data from the provided link
def download_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Failed to download data from {url}")

# Function to process and convert data to a structured format
def process_data(data):
    structured_data = []
    for meteorite in data:
        row = {
            'Name of Earth Meteorite': meteorite.get('name', ''),
            'ID of Earth Meteorite': meteorite.get('id', ''),
            'Meteorite Type': meteorite.get('nametype', ''),
            'Recclass': meteorite.get('recclass', ''),
            'Mass (g)': float(meteorite.get('mass', 0)),  # Use default value of 0 if 'mass' key is missing
            'Year': pd.to_datetime(meteorite.get('year', '1900-01-01'), errors='coerce'),  # Convert to datetime format, handle invalid values
            'Latitude': float(meteorite.get('reclat', 0)),  # Use default value of 0 if 'reclat' key is missing
            'Longitude': float(meteorite.get('reclong', 0)),  # Use default value of 0 if 'reclong' key is missing
            'Coordinates': [float(meteorite.get('reclat', 0)), float(meteorite.get('reclong', 0))]  # Use default values of 0 if 'reclat' or 'reclong' keys are missing
        }
        structured_data.append(row)
    return structured_data


# Function to convert structured data to CSV format
def convert_to_csv(data, output_file):
    df = pd.DataFrame(data)
    df.to_csv(output_file, index=False)

# Main program
if __name__ == '__main__':
    link = "https://data.nasa.gov/resource/y77d-th95.json"
    output_file = "meteorite_data.csv"

    # Download data from the provided link
    downloaded_data = download_data(link)

    # Process the downloaded data
    processed_data = process_data(downloaded_data)

    # Convert processed data to Excel format
    convert_to_csv(processed_data, output_file)

    print(f"Data has been downloaded, processed, and saved to {output_file} in CSV format.")


Data has been downloaded, processed, and saved to meteorite_data.csv in CSV format.


In [36]:

df = pd.read_csv('/content/meteorite_data.csv')
df


Unnamed: 0,Name of Earth Meteorite,ID of Earth Meteorite,Meteorite Type,Recclass,Mass (g),Year,Latitude,Longitude,Coordinates
0,Aachen,1,Valid,L5,21.0,1880-01-01,50.77500,6.08333,"[50.775, 6.08333]"
1,Aarhus,2,Valid,H6,720.0,1951-01-01,56.18333,10.23333,"[56.18333, 10.23333]"
2,Abee,6,Valid,EH4,107000.0,1952-01-01,54.21667,-113.00000,"[54.21667, -113.0]"
3,Acapulco,10,Valid,Acapulcoite,1914.0,1976-01-01,16.88333,-99.90000,"[16.88333, -99.9]"
4,Achiras,370,Valid,L6,780.0,1902-01-01,-33.16667,-64.95000,"[-33.16667, -64.95]"
...,...,...,...,...,...,...,...,...,...
995,Tirupati,24009,Valid,H6,230.0,1934-01-01,13.63333,79.41667,"[13.63333, 79.41667]"
996,Tissint,54823,Valid,Martian (shergottite),7000.0,2011-01-01,29.48195,-7.61123,"[29.48195, -7.61123]"
997,Tjabe,24011,Valid,H6,20000.0,1869-01-01,-7.08333,111.53333,"[-7.08333, 111.53333]"
998,Tjerebon,24012,Valid,L5,16500.0,1922-01-01,-6.66667,106.58333,"[-6.66667, 106.58333]"
