From 4c021de75b27832b965bc4b70a008f1107f6ff1f Mon Sep 17 00:00:00 2001 From: oldoc63 Date: Wed, 27 Oct 2021 12:58:06 -0400 Subject: [PATCH] Summarizin an ordinal category #211 --- summAutoEval/autoEval.ipynb | 84 +++++++++++++++++++++++++++++-------- summAutoEval/script.py | 13 ++++++ 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/summAutoEval/autoEval.ipynb b/summAutoEval/autoEval.ipynb index 4582000..56cac88 100644 --- a/summAutoEval/autoEval.ipynb +++ b/summAutoEval/autoEval.ipynb @@ -3,17 +3,18 @@ { "cell_type": "code", "execution_count": 1, - "id": "f961e22b", + "id": "3892cc84", "metadata": {}, "outputs": [], "source": [ - "import pandas as pd" + "import pandas as pd\n", + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "eec8cecf", + "execution_count": 2, + "id": "56982b85", "metadata": {}, "outputs": [], "source": [ @@ -22,8 +23,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "4d1c9e0c", + "execution_count": 3, + "id": "46cc7da7", "metadata": {}, "outputs": [ { @@ -52,8 +53,8 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "da5fed0d", + "execution_count": 4, + "id": "eb961155", "metadata": {}, "outputs": [], "source": [ @@ -62,8 +63,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "319136c5", + "execution_count": 5, + "id": "06be8102", "metadata": {}, "outputs": [ { @@ -87,8 +88,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "496ac1d4", + "execution_count": 6, + "id": "2628a909", "metadata": {}, "outputs": [], "source": [ @@ -97,8 +98,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "03b74363", + "execution_count": 7, + "id": "54a2862a", "metadata": {}, "outputs": [ { @@ -115,8 +116,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "57684edc", + "execution_count": 8, + "id": "c65115f2", "metadata": {}, "outputs": [ { @@ -137,10 +138,59 @@ "print(manufacturer_country_proportions.head())" ] }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a61c8ee1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['vhigh' 'med' 'low' 'high']\n" + ] + } + ], + "source": [ + "print(car_eval.buying_cost.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ee72e36c", + "metadata": {}, + "outputs": [], + "source": [ + "buying_cost_categories = ['low', 'med', 'high', 'vhigh']\n", + "car_eval['buying_cost'] = pd.Categorical(car_eval['buying_cost'], buying_cost_categories, ordered=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1ef72999", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0\n" + ] + } + ], + "source": [ + "buying_cost_median = np.median(car_eval['buying_cost'].cat.codes)\n", + "\n", + "print(buying_cost_median)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "af9a3594", + "id": "719fa00f", "metadata": {}, "outputs": [], "source": [] diff --git a/summAutoEval/script.py b/summAutoEval/script.py index cfc4637..623482c 100644 --- a/summAutoEval/script.py +++ b/summAutoEval/script.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np car_eval = pd.read_csv('car_eval_dataset.csv') print(car_eval.head()) @@ -11,3 +12,15 @@ manufacturer_country_proportions = car_eval.manufacturer_country.value_counts(normalize=True) print(manufacturer_country_proportions.head()) + +print(car_eval.buying_cost.unique()) + +buying_cost_categories = ['low', 'med', 'high', 'vhigh'] + +car_eval['buying_cost'] = pd.Categorical(car_eval['buying_cost'], buying_cost_categories, ordered=True) + +buying_cost_median = np.median(car_eval['buying_cost'].cat.codes) + +print(buying_cost_median) + +