---
---
---

# NASA Meteorite Landings Prediction

---
---
---

<br>

## Dependencies

### Packages

In [1]:
from kagglehub import KaggleDatasetAdapter
import kagglehub

### Utilities

In [2]:
from IPython.display import display
from ydata_profiling import ProfileReport
import os
import webbrowser

# -------------------------

def profile_data(df,
                 file_name='data_profiling',
                 report_title='YData Profiling Report',
                 show=False):
    """
    Profiles the given DataFrame and generates an HTML report.

    Parameters:
        - df (pd.DataFrame): The DataFrame to profile.
        - file_name (str): The name of the output HTML file (without extension). Default is 'data_profiling'.
        - report_title (str): The title of the report. Default is 'YData Profiling Report'.
        - show (bool): Whether to display the report in the notebook. Default is False.
    """

    # Ensuring the output directory exists
    WORK_DIR = os.path.abspath('../res/data-profiling')
    if not os.path.exists(WORK_DIR):
        os.makedirs(WORK_DIR)

    # Creating the report
    OUT_FILE = os.path.join(WORK_DIR, f"{file_name}.html")
    profile = ProfileReport(df, explorative=True, title=report_title)
    profile.to_file(OUT_FILE)

    # Display in the notebook if required
    if show:
        display(profile)

    # -------------------------

    # Opening the report in the default web browser
    webbrowser.open(f"file://{OUT_FILE}")

### Dataset

In [3]:
# Importing data
df = kagglehub.dataset_load(adapter=KaggleDatasetAdapter.PANDAS,
                            handle='nasa/meteorite-landings',
                            path='meteorite-landings.csv')

# -------------------------

df.head()

Unnamed: 0,name,id,nametype,recclass,mass,fall,year,reclat,reclong,GeoLocation
0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.775,6.08333,"(50.775000, 6.083330)"
1,Aarhus,2,Valid,H6,720.0,Fell,1951.0,56.18333,10.23333,"(56.183330, 10.233330)"
2,Abee,6,Valid,EH4,107000.0,Fell,1952.0,54.21667,-113.0,"(54.216670, -113.000000)"
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.883330, -99.900000)"
4,Achiras,370,Valid,L6,780.0,Fell,1902.0,-33.16667,-64.95,"(-33.166670, -64.950000)"


## Data Profiling

In [4]:
# Profiling the data
profile_data(df,
             file_name='data-profiling',
             report_title='Meteorite Landings Dataset',
             show=True)

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:07,  1.13it/s][A
100%|██████████| 10/10 [00:01<00:00,  6.90it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

