In [4]:
import random
import numpy as np
import pandas as pd

def simulate_meteorite_dataset(years):
    # Percentage of total meteorites by mass group. Data obtained in R
    meteor_percentage = {"Pebble": 0.845185066,
                         "Very Small": 0.045969805,
                         "Small": 0.077391444,
                         "Medium": 0.021022272,
                         "Large": 0.009004967,
                         "Very Large": 0.001506169,
                         "Enormous": 0.000192277}
    
    # Meteorite sizes in each mass group
    meteor_sizes = {"Pebble": [0, 1],
                    "Very Small": [1, 2],
                    "Small": [2, 20],
                    "Medium": [20, 100],
                    "Large": [100, 1000],
                    "Very Large": [1000, 10000],
                    "Enormous": [10000, 10000000]}
    
    # Create an empty dataframe to hold the meteorite data
    meteor_data2 = pd.DataFrame(columns=["year", "mass"])
    
    # Rate per year that meteorites land on Earth. Data obtained in R
    mean_rate = 738.65
    sd_rate = 665.831446677472
    meteor_rate = np.random.normal(mean_rate, sd_rate, years)
    meteor_count = [max(0, int(rate)) for rate in meteor_rate]
    
    # list of the size of each meteorite that falls each year
    meteor_size = {year: [] for year in range(years)}
    
    # Simulates the meteorite falls for the given number of years
    for year in range(years):
        num_meteors = meteor_count[year]
        
        # Simulates the size of each meteorite that falls in this year
        for i in range(num_meteors):
            mass_group = random.choices(list(meteor_percentage.keys()),
                                        weights=list(meteor_percentage.values()),
                                        k=1)[0]
            
            # Randomly selects a size of the meteorite
            min_mass, max_mass = meteor_sizes[mass_group]
            meteor_mass = np.random.normal((min_mass + max_mass)/2, (max_mass - min_mass)/4)
            meteor_mass = max(0, meteor_mass) # Ensure the mass is non-negative
            
            meteor_size[year].append(meteor_mass)
        
        # Append the meteorite data to the dataframe
        meteor_data2 = pd.concat([meteor_data2, pd.DataFrame({"year": [year + 2024]*num_meteors, "mass": meteor_size[year]})], ignore_index=True)
    
    return meteor_data2


In [5]:
data = simulate_meteorite_dataset(39)
data.to_csv("meteorite_data3.csv", index=False)