From cade4132f4e9eeea56cacd2e8c5599a6717f9ee1 Mon Sep 17 00:00:00 2001 From: oldoc63 Date: Fri, 22 Oct 2021 15:52:59 -0400 Subject: [PATCH] Proportions are more informative than value.counts #199 --- categorical/script.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/categorical/script.py b/categorical/script.py index df21202..9c05e0c 100644 --- a/categorical/script.py +++ b/categorical/script.py @@ -1,24 +1,8 @@ import pandas as pd -import numpy as np -# Read NYC Trees Data -nyc_trees = pd.read_csv("nyc_tree_census2.csv") +# Read NYC Trees data +nyc_trees = pd.read_csv("./nyc_tree_census.csv") -size_labels_ordered = ['Small (0-3in)', 'Medium (3-10in)', 'Medium-Large (10-18in)', 'Large (18-24in)','Very large (>24in)'] - -nyc_trees.tree_diam_category = pd.Categorical(nyc_trees.tree_diam_category, size_labels_ordered, ordered=True) - -# Calculate 25th Percentile Category -p25_tree_diam_index = np.percentile(nyc_trees.tree_diam_category.cat.codes, 25) -print(p25_tree_diam_index) - -p25_tree_diam_category = size_labels_ordered[int(p25_tree_diam_index)] -print(p25_tree_diam_category) - -# Calculate 75th Percentile Category -p75_tree_diam_index = np.percentile(nyc_trees.tree_diam_category.cat.codes, 75) -print(p75_tree_diam_index) - -p75_tree_diam_category = size_labels_ordered[int(p75_tree_diam_index)] -print(p75_tree_diam_category) +# Table of proportions for status +tree_status_proportions = nyc_trees.status.value_counts()/len(nyc_trees['status'])