From 9fd9026eaae5a1f0605e66e5413b8026c5a393a6 Mon Sep 17 00:00:00 2001 From: oldoc63 Date: Wed, 27 Oct 2021 11:58:10 -0400 Subject: [PATCH] Summarizing a nominal var with frequencies and proportions #209 --- .../autoEval-checkpoint.ipynb | 83 +++++++++ summAutoEval/autoEval.ipynb | 170 ++++++++++++++++++ summAutoEval/script.py | 9 + 3 files changed, 262 insertions(+) create mode 100644 summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb create mode 100644 summAutoEval/autoEval.ipynb diff --git a/summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb b/summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb new file mode 100644 index 0000000..98e5055 --- /dev/null +++ b/summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "22d044d3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "07497e4d", + "metadata": {}, + "outputs": [], + "source": [ + "car_eval = pd.read_csv('car_eval_dataset.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0be19bea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " buying_cost maintenance_cost doors capacity luggage safety acceptability \\\n", + "0 vhigh low 4 4 small med unacc \n", + "1 vhigh med 3 4 small high acc \n", + "2 med high 3 2 med high unacc \n", + "3 low med 4 more big low unacc \n", + "4 low high 2 more med high acc \n", + "\n", + " manufacturer_country \n", + "0 China \n", + "1 France \n", + "2 United States \n", + "3 United States \n", + "4 South Korea \n" + ] + } + ], + "source": [ + "print(car_eval.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37008d81", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/summAutoEval/autoEval.ipynb b/summAutoEval/autoEval.ipynb new file mode 100644 index 0000000..4582000 --- /dev/null +++ b/summAutoEval/autoEval.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f961e22b", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "eec8cecf", + "metadata": {}, + "outputs": [], + "source": [ + "car_eval = pd.read_csv('car_eval_dataset.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4d1c9e0c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " buying_cost maintenance_cost doors capacity luggage safety acceptability \\\n", + "0 vhigh low 4 4 small med unacc \n", + "1 vhigh med 3 4 small high acc \n", + "2 med high 3 2 med high unacc \n", + "3 low med 4 more big low unacc \n", + "4 low high 2 more med high acc \n", + "\n", + " manufacturer_country \n", + "0 China \n", + "1 France \n", + "2 United States \n", + "3 United States \n", + "4 South Korea \n" + ] + } + ], + "source": [ + "print(car_eval.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "da5fed0d", + "metadata": {}, + "outputs": [], + "source": [ + "manufacturer_country_counts = car_eval.manufacturer_country.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "319136c5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Japan 228\n", + "Germany 218\n", + "South Korea 159\n", + "United States 138\n", + "Italy 97\n", + "France 87\n", + "China 73\n", + "Name: manufacturer_country, dtype: int64\n" + ] + } + ], + "source": [ + "print(manufacturer_country_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "496ac1d4", + "metadata": {}, + "outputs": [], + "source": [ + "fourth_manufacturer_country = manufacturer_country_counts.index[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "03b74363", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "United States\n" + ] + } + ], + "source": [ + "print(fourth_manufacturer_country)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "57684edc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Japan 0.228\n", + "Germany 0.218\n", + "South Korea 0.159\n", + "United States 0.138\n", + "Italy 0.097\n", + "Name: manufacturer_country, dtype: float64\n" + ] + } + ], + "source": [ + "manufacturer_country_proportions = car_eval.manufacturer_country.value_counts(normalize=True)\n", + "print(manufacturer_country_proportions.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af9a3594", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/summAutoEval/script.py b/summAutoEval/script.py index aec51e0..cfc4637 100644 --- a/summAutoEval/script.py +++ b/summAutoEval/script.py @@ -2,3 +2,12 @@ car_eval = pd.read_csv('car_eval_dataset.csv') print(car_eval.head()) + +manufacturer_country_counts = car_eval.manufacturer_country.value_counts() +print(manufacturer_country_counts) + +fourth_manufacturer_country = manufacturer_country_counts.index[3] +print(fourth_manufacturer_country) + +manufacturer_country_proportions = car_eval.manufacturer_country.value_counts(normalize=True) +print(manufacturer_country_proportions.head())