From d5b424f4c0f75b271be2a560842431adccee8012 Mon Sep 17 00:00:00 2001 From: Sanober Khoso Date: Fri, 13 Oct 2023 22:56:19 +0200 Subject: [PATCH] SanoberKhoso --- your-code/main.ipynb | 720 +++++++++++++++++++++++++++---------------- 1 file changed, 459 insertions(+), 261 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 8a76302..2732ebc 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -1,261 +1,459 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Introduction to Pandas Lab\n", - "\n", - "Complete the following set of exercises to solidify your knowledge of Pandas fundamentals." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Import Numpy and Pandas and alias them to `np` and `pd` respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Create a Pandas Series containing the elements of the list below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lst = [5.7, 75.2, 74.4, 84.0, 66.5, 66.3, 55.8, 75.7, 29.1, 43.7]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Use indexing to return the third value in the Series above.\n", - "\n", - "*Hint: Remember that indexing begins at 0.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Create a Pandas DataFrame from the list of lists below. Each sublist should be represented as a row." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", - " [61.3, 40.8, 30.8, 37.8, 87.6],\n", - " [20.6, 73.2, 44.2, 14.6, 91.8],\n", - " [57.4, 0.1, 96.1, 4.2, 69.5],\n", - " [83.6, 20.5, 85.4, 22.8, 35.9],\n", - " [49.0, 69.0, 0.1, 31.8, 89.1],\n", - " [23.3, 40.7, 95.0, 83.8, 26.9],\n", - " [27.6, 26.4, 53.8, 88.8, 68.5],\n", - " [96.6, 96.4, 53.4, 72.4, 50.1],\n", - " [73.7, 39.0, 43.2, 81.6, 34.7]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Rename the data frame columns based on the names in the list below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Create a subset of this data frame that contains only the Score 1, 3, and 5 columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. From the original data frame, calculate the average Score_3 value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 8. From the original data frame, calculate the maximum Score_4 value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 9. From the original data frame, calculate the median Score 2 value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 10. Create a Pandas DataFrame from the dictionary of product orders below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "orders = {'Description': ['LUNCH BAG APPLE DESIGN',\n", - " 'SET OF 60 VINTAGE LEAF CAKE CASES ',\n", - " 'RIBBON REEL STRIPES DESIGN ',\n", - " 'WORLD WAR 2 GLIDERS ASSTD DESIGNS',\n", - " 'PLAYING CARDS JUBILEE UNION JACK',\n", - " 'POPCORN HOLDER',\n", - " 'BOX OF VINTAGE ALPHABET BLOCKS',\n", - " 'PARTY BUNTING',\n", - " 'JAZZ HEARTS ADDRESS BOOK',\n", - " 'SET OF 4 SANTA PLACE SETTINGS'],\n", - " 'Quantity': [1, 24, 1, 2880, 2, 7, 1, 4, 10, 48],\n", - " 'UnitPrice': [1.65, 0.55, 1.65, 0.18, 1.25, 0.85, 11.95, 4.95, 0.19, 1.25],\n", - " 'Revenue': [1.65, 13.2, 1.65, 518.4, 2.5, 5.95, 11.95, 19.8, 1.9, 60.0]}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 11. Calculate the total quantity ordered and revenue generated from these orders." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 12. Obtain the prices of the most expensive and least expensive items ordered and print the difference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction to Pandas Lab\n", + "\n", + "Complete the following set of exercises to solidify your knowledge of Pandas fundamentals." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Import Numpy and Pandas and alias them to `np` and `pd` respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Create a Pandas Series containing the elements of the list below." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "lst = [5.7, 75.2, 74.4, 84.0, 66.5, 66.3, 55.8, 75.7, 29.1, 43.7]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lst_series = pd.Series(lst)\n", + "lst_series" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Use indexing to return the third value in the Series above.\n", + "\n", + "*Hint: Remember that indexing begins at 0.*" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "74.4" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lst_series[2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Create a Pandas DataFrame from the list of lists below. Each sublist should be represented as a row." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", + " [61.3, 40.8, 30.8, 37.8, 87.6],\n", + " [20.6, 73.2, 44.2, 14.6, 91.8],\n", + " [57.4, 0.1, 96.1, 4.2, 69.5],\n", + " [83.6, 20.5, 85.4, 22.8, 35.9],\n", + " [49.0, 69.0, 0.1, 31.8, 89.1],\n", + " [23.3, 40.7, 95.0, 83.8, 26.9],\n", + " [27.6, 26.4, 53.8, 88.8, 68.5],\n", + " [96.6, 96.4, 53.4, 72.4, 50.1],\n", + " [73.7, 39.0, 43.2, 81.6, 34.7]]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7\n" + ] + } + ], + "source": [ + "df = pd.DataFrame(b)\n", + "print(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Rename the data frame columns based on the names in the list below." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7\n" + ] + } + ], + "source": [ + "df.columns = colnames\n", + "print(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Create a subset of this data frame that contains only the Score 1, 3, and 5 columns." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7\n" + ] + } + ], + "source": [ + "df_subset = df[['Score_1', 'Score_3', 'Score_5']]\n", + "print(df_subset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. From the original data frame, calculate the average Score_3 value." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "56.95000000000001" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.mean(df['Score_3'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 8. From the original data frame, calculate the maximum Score_4 value." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "88.8" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max(df['Score_4'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9. From the original data frame, calculate the median Score 2 value." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "40.75" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.median(df['Score_2'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 10. Create a Pandas DataFrame from the dictionary of product orders below." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "orders = {'Description': ['LUNCH BAG APPLE DESIGN',\n", + " 'SET OF 60 VINTAGE LEAF CAKE CASES ',\n", + " 'RIBBON REEL STRIPES DESIGN ',\n", + " 'WORLD WAR 2 GLIDERS ASSTD DESIGNS',\n", + " 'PLAYING CARDS JUBILEE UNION JACK',\n", + " 'POPCORN HOLDER',\n", + " 'BOX OF VINTAGE ALPHABET BLOCKS',\n", + " 'PARTY BUNTING',\n", + " 'JAZZ HEARTS ADDRESS BOOK',\n", + " 'SET OF 4 SANTA PLACE SETTINGS'],\n", + " 'Quantity': [1, 24, 1, 2880, 2, 7, 1, 4, 10, 48],\n", + " 'UnitPrice': [1.65, 0.55, 1.65, 0.18, 1.25, 0.85, 11.95, 4.95, 0.19, 1.25],\n", + " 'Revenue': [1.65, 13.2, 1.65, 518.4, 2.5, 5.95, 11.95, 19.8, 1.9, 60.0]}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00\n" + ] + } + ], + "source": [ + "orders_df = pd.DataFrame(orders)\n", + "print(orders_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 11. Calculate the total quantity ordered and revenue generated from these orders." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Quantity: 2978 \n", + "Total Revenue: 637.0\n" + ] + } + ], + "source": [ + "total_qty = sum(orders_df['Quantity'])\n", + "total_revenue = sum(orders_df['Revenue'])\n", + "\n", + "print(f'Total Quantity: {total_qty} \\nTotal Revenue: {total_revenue}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 12. Obtain the prices of the most expensive and least expensive items ordered and print the difference." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The least expensive item costs 0.18 and the max expensive item costs 11.95 and the difference is 11.77\n" + ] + } + ], + "source": [ + "min_price = min(orders_df['UnitPrice'])\n", + "max_price = max(orders_df['UnitPrice'])\n", + "diff_price = max_price - min_price\n", + "\n", + "print(f'The least expensive item costs {min_price} and the max expensive item costs {max_price} and the difference is {diff_price}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}