From 9fd9026eaae5a1f0605e66e5413b8026c5a393a6 Mon Sep 17 00:00:00 2001
From: oldoc63 <leopoldo.olmos@gmail.com>
Date: Wed, 27 Oct 2021 11:58:10 -0400
Subject: [PATCH] Summarizing a nominal var with frequencies and proportions
 #209

---
 .../autoEval-checkpoint.ipynb                 |  83 +++++++++
 summAutoEval/autoEval.ipynb                   | 170 ++++++++++++++++++
 summAutoEval/script.py                        |   9 +
 3 files changed, 262 insertions(+)
 create mode 100644 summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb
 create mode 100644 summAutoEval/autoEval.ipynb

diff --git a/summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb b/summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb
new file mode 100644
index 0000000..98e5055
--- /dev/null
+++ b/summAutoEval/.ipynb_checkpoints/autoEval-checkpoint.ipynb
@@ -0,0 +1,83 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "22d044d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "07497e4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "car_eval = pd.read_csv('car_eval_dataset.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0be19bea",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  buying_cost maintenance_cost doors capacity luggage safety acceptability  \\\n",
+      "0       vhigh              low     4        4   small    med         unacc   \n",
+      "1       vhigh              med     3        4   small   high           acc   \n",
+      "2         med             high     3        2     med   high         unacc   \n",
+      "3         low              med     4     more     big    low         unacc   \n",
+      "4         low             high     2     more     med   high           acc   \n",
+      "\n",
+      "  manufacturer_country  \n",
+      "0                China  \n",
+      "1               France  \n",
+      "2        United States  \n",
+      "3        United States  \n",
+      "4          South Korea  \n"
+     ]
+    }
+   ],
+   "source": [
+    "print(car_eval.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "37008d81",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/summAutoEval/autoEval.ipynb b/summAutoEval/autoEval.ipynb
new file mode 100644
index 0000000..4582000
--- /dev/null
+++ b/summAutoEval/autoEval.ipynb
@@ -0,0 +1,170 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f961e22b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "eec8cecf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "car_eval = pd.read_csv('car_eval_dataset.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4d1c9e0c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  buying_cost maintenance_cost doors capacity luggage safety acceptability  \\\n",
+      "0       vhigh              low     4        4   small    med         unacc   \n",
+      "1       vhigh              med     3        4   small   high           acc   \n",
+      "2         med             high     3        2     med   high         unacc   \n",
+      "3         low              med     4     more     big    low         unacc   \n",
+      "4         low             high     2     more     med   high           acc   \n",
+      "\n",
+      "  manufacturer_country  \n",
+      "0                China  \n",
+      "1               France  \n",
+      "2        United States  \n",
+      "3        United States  \n",
+      "4          South Korea  \n"
+     ]
+    }
+   ],
+   "source": [
+    "print(car_eval.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "da5fed0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "manufacturer_country_counts = car_eval.manufacturer_country.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "319136c5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Japan            228\n",
+      "Germany          218\n",
+      "South Korea      159\n",
+      "United States    138\n",
+      "Italy             97\n",
+      "France            87\n",
+      "China             73\n",
+      "Name: manufacturer_country, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(manufacturer_country_counts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "496ac1d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fourth_manufacturer_country = manufacturer_country_counts.index[3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "03b74363",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "United States\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(fourth_manufacturer_country)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "57684edc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Japan            0.228\n",
+      "Germany          0.218\n",
+      "South Korea      0.159\n",
+      "United States    0.138\n",
+      "Italy            0.097\n",
+      "Name: manufacturer_country, dtype: float64\n"
+     ]
+    }
+   ],
+   "source": [
+    "manufacturer_country_proportions = car_eval.manufacturer_country.value_counts(normalize=True)\n",
+    "print(manufacturer_country_proportions.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af9a3594",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/summAutoEval/script.py b/summAutoEval/script.py
index aec51e0..cfc4637 100644
--- a/summAutoEval/script.py
+++ b/summAutoEval/script.py
@@ -2,3 +2,12 @@
 
 car_eval = pd.read_csv('car_eval_dataset.csv')
 print(car_eval.head())
+
+manufacturer_country_counts = car_eval.manufacturer_country.value_counts()
+print(manufacturer_country_counts)
+
+fourth_manufacturer_country = manufacturer_country_counts.index[3]
+print(fourth_manufacturer_country)
+
+manufacturer_country_proportions = car_eval.manufacturer_country.value_counts(normalize=True)
+print(manufacturer_country_proportions.head())