### **Krit's Notebook**

### **The Best Neighborhood**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Weight factors for each dataset
weight_factors = {
    'green_spaces': 2,
    'greenways': 3,
    'recycling_centers': 2,
    'smart_trash_containers': 1
}

# Read and process each dataset
datasets = {
    'green_spaces': pd.read_csv('green-spaces-locations-pgh.csv'),
    'greenways': pd.read_csv('greenways-locations-pgh.csv'),
    'recycling_centers': pd.read_csv('recyling-centers-locations-pgh.csv'),
    'smart_trash_containers': pd.read_csv('smart-trash-locations-pgh.csv')
}

# Calculate counts for each dataset
counts = {}
for dataset_name, dataset in datasets.items():
    if dataset_name == 'green_spaces':
        # Filter out rows where the Facility column is not empty
        green_spaces = dataset[dataset['Facility'].notnull()]
        # Count the number of green spaces in each neighborhood
        counts[dataset_name] = green_spaces['Neighborhood'].value_counts()
    elif dataset_name == 'greenways':
        # Count the number of greenways in each neighborhood
        counts[dataset_name] = dataset['nhood'].value_counts()
    elif dataset_name == 'recycling_centers':
        # Grouping by neighborhood and counting the number of entries for each neighborhood
        counts[dataset_name] = dataset.groupby('neighborhood').size()
    elif dataset_name == 'smart_trash_containers':
        # Grouping by neighborhood and counting the number of smart trash locations for each neighborhood
        counts[dataset_name] = dataset.groupby('neighborhood').size()

# Calculate weighted scores for each neighborhood
weighted_scores = {}
for neighborhood, count in counts['green_spaces'].items():
    weighted_score = 0
    for dataset_name, dataset_count in counts.items():
        weighted_score += count * weight_factors.get(dataset_name, 1) * dataset_count.get(neighborhood, 0)
    weighted_scores[neighborhood] = weighted_score

# Find the best neighborhood overall
best_neighborhood = max(weighted_scores, key=weighted_scores.get)
best_score = weighted_scores[best_neighborhood]

# Data Visualization - Graph of all the neighborhoods
# Convert weighted_scores dictionary to a DataFrame for easier plotting
neighborhood_scores_df = pd.DataFrame(list(weighted_scores.items()), columns=['Neighborhood', 'Weighted Score'])
# Sort neighborhoods by their weighted scores
neighborhood_scores_df = neighborhood_scores_df.sort_values(by='Weighted Score', ascending=False)
# Plotting
plt.figure(figsize=(10, 6))
plt.barh(neighborhood_scores_df['Neighborhood'], neighborhood_scores_df['Weighted Score'], color='skyblue')
plt.xlabel('Weighted Score')
plt.ylabel('Neighborhood')
plt.title('Weighted Scores for Each Neighborhood')
plt.grid(axis='x')
plt.tight_layout()
plt.show()

# Display the best neighborhood overall and its score
print("The best neighborhood overall based on the weighted factors is:", best_neighborhood)
print("Weighted score (overall):", best_score)