In [1]:
import pandas as pd

In [3]:
dataset = pd.read_csv("waste_generation_data.csv")

In [8]:
dataset.columns = [i.replace(" ", "_").strip().lower() for i in dataset.columns]

In [10]:
dataset.to_csv("waste_generation.csv")

In [11]:
import math

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance in kilometers between two points on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    # haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a))
    r = 6371  # Radius of Earth in kilometers. Use 3956 for miles
    return c * r



def top_n_nearest_rows(lat, lon, n, dataset):
    """
    Find the top n nearest rows in the dataset to the given lat and lon.
    
    :param lat: latitude of the point to compare
    :param lon: longitude of the point to compare
    :param n: number of nearest points to find
    :param dataset: dataset containing latitude and longitude
    :return: list of top n nearest rows from the dataset
    """
    distances = []

    for index, row in dataset.iterrows():  # if dataset is a DataFrame
        item_lat, item_lon = float(row["latitude"]), float(row["longitude"])
        distance = haversine(lat, lon, item_lat, item_lon)
        distances.append((distance, row.to_dict()))  # storing the entire row as a dictionary
    
    # Sort the distances list and take the top n items
    distances.sort()
    top_n = distances[:n]
    
    # Extract only the row from top_n for the final result
    result = [row for dist, row in top_n]
    
    return result


In [12]:
lat, lon = 30.0827, 80.2707


# Assuming land_prices is a DataFrame
top_n = top_n_nearest_rows(lat, lon, 5, dataset)  # Find top 5 nearest rows
print(f"The top 5 nearest rows to ({lat}, {lon}) are \n {top_n}")




The top 5 nearest rows to (30.0827, 80.2707) are 
 [{'entry_id': 128, 'waste_amount_(ton)': 194.61, 'latitude': 27.081429, 'longitude': 78.537563, 'date_of_record': '2023-05-30', 'waste_type': 'Hazardous', 'source': 'Residential', 'disposal_method': 'Landfill', 'comments': nan}, {'entry_id': 519, 'waste_amount_(ton)': 25.55, 'latitude': 28.35782, 'longitude': 86.826768, 'date_of_record': '2023-09-17', 'waste_type': 'Hazardous', 'source': 'Commercial', 'disposal_method': 'Composted', 'comments': nan}, {'entry_id': 602, 'waste_amount_(ton)': 2.69, 'latitude': 28.6476315, 'longitude': 72.495275, 'date_of_record': '2023-04-05', 'waste_type': 'Hazardous', 'source': 'Agricultural', 'disposal_method': 'Landfill', 'comments': nan}, {'entry_id': 196, 'waste_amount_(ton)': 205.33, 'latitude': 40.216736, 'longitude': 80.801225, 'date_of_record': '2023-08-19', 'waste_type': 'Electronic', 'source': 'Agricultural', 'disposal_method': 'Landfill', 'comments': nan}, {'entry_id': 662, 'waste_amount_(ton

In [14]:
avg_waste_amount_ton = sum([row["waste_amount_(ton)"] for row in top_n]) / len(top_n)

In [15]:
avg_waste_amount_ton

162.44

In [16]:
def get_avg_waste():
    n = 10
    dataset = pd.read_csv("waste_generation.csv")
    top_n = top_n_nearest_rows(lat, lon, n, dataset)
    avg_waste_amount_ton = sum([row["waste_amount_(ton)"] for row in top_n]) / len(top_n)
    return avg_waste_amount_ton

In [17]:
value = get_avg_waste()

In [18]:
value

170.061