diff --git a/your-code/notebooks/main.ipynb b/your-code/notebooks/main.ipynb
index 07928c1..507550e 100755
--- a/your-code/notebooks/main.ipynb
+++ b/your-code/notebooks/main.ipynb
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
@@ -60,11 +60,169 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 119,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Size | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Price | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Photo Editor & Candy Camera & Grid & ScrapBook | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.1 | \n",
+ " 159 | \n",
+ " 19M | \n",
+ " 10,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " January 7, 2018 | \n",
+ " 1.0.0 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Coloring book moana | \n",
+ " ART_AND_DESIGN | \n",
+ " 3.9 | \n",
+ " 967 | \n",
+ " 14M | \n",
+ " 500,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design;Pretend Play | \n",
+ " January 15, 2018 | \n",
+ " 2.0.0 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " U Launcher Lite – FREE Live Cool Themes, Hide ... | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.7 | \n",
+ " 87510 | \n",
+ " 8.7M | \n",
+ " 5,000,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " August 1, 2018 | \n",
+ " 1.2.4 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Sketch - Draw & Paint | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.5 | \n",
+ " 215644 | \n",
+ " 25M | \n",
+ " 50,000,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Teen | \n",
+ " Art & Design | \n",
+ " June 8, 2018 | \n",
+ " Varies with device | \n",
+ " 4.2 and up | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Pixel Draw - Number Art Coloring Book | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.3 | \n",
+ " 967 | \n",
+ " 2.8M | \n",
+ " 100,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design;Creativity | \n",
+ " June 20, 2018 | \n",
+ " 1.1 | \n",
+ " 4.4 and up | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating \\\n",
+ "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN 4.1 \n",
+ "1 Coloring book moana ART_AND_DESIGN 3.9 \n",
+ "2 U Launcher Lite – FREE Live Cool Themes, Hide ... ART_AND_DESIGN 4.7 \n",
+ "3 Sketch - Draw & Paint ART_AND_DESIGN 4.5 \n",
+ "4 Pixel Draw - Number Art Coloring Book ART_AND_DESIGN 4.3 \n",
+ "\n",
+ " Reviews Size Installs Type Price Content Rating \\\n",
+ "0 159 19M 10,000+ Free 0 Everyone \n",
+ "1 967 14M 500,000+ Free 0 Everyone \n",
+ "2 87510 8.7M 5,000,000+ Free 0 Everyone \n",
+ "3 215644 25M 50,000,000+ Free 0 Teen \n",
+ "4 967 2.8M 100,000+ Free 0 Everyone \n",
+ "\n",
+ " Genres Last Updated Current Ver \\\n",
+ "0 Art & Design January 7, 2018 1.0.0 \n",
+ "1 Art & Design;Pretend Play January 15, 2018 2.0.0 \n",
+ "2 Art & Design August 1, 2018 1.2.4 \n",
+ "3 Art & Design June 8, 2018 Varies with device \n",
+ "4 Art & Design;Creativity June 20, 2018 1.1 \n",
+ "\n",
+ " Android Ver \n",
+ "0 4.0.3 and up \n",
+ "1 4.0.3 and up \n",
+ "2 4.0.3 and up \n",
+ "3 4.2 and up \n",
+ "4 4.4 and up "
+ ]
+ },
+ "execution_count": 119,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_play = pd.read_csv(r'C:\\Users\\Ana\\OneDrive\\Documents\\Ironhack\\Week 7\\Labs\\lab-supervised-learning-feature-extraction\\data\\googleplaystore.csv')\n",
+ "google_play.head()\n"
]
},
{
@@ -76,11 +234,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 120,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App object\n",
+ "Category object\n",
+ "Rating float64\n",
+ "Reviews object\n",
+ "Size object\n",
+ "Installs object\n",
+ "Type object\n",
+ "Price object\n",
+ "Content Rating object\n",
+ "Genres object\n",
+ "Last Updated object\n",
+ "Current Ver object\n",
+ "Android Ver object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 120,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_play.dtypes"
]
},
{
@@ -92,11 +275,168 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 121,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Size | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Price | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Photo Editor & Candy Camera & Grid & ScrapBook | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.1 | \n",
+ " 159 | \n",
+ " 19M | \n",
+ " 10,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " January 7, 2018 | \n",
+ " 1.0.0 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Coloring book moana | \n",
+ " ART_AND_DESIGN | \n",
+ " 3.9 | \n",
+ " 967 | \n",
+ " 14M | \n",
+ " 500,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design;Pretend Play | \n",
+ " January 15, 2018 | \n",
+ " 2.0.0 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " U Launcher Lite – FREE Live Cool Themes, Hide ... | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.7 | \n",
+ " 87510 | \n",
+ " 8.7M | \n",
+ " 5,000,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " August 1, 2018 | \n",
+ " 1.2.4 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Sketch - Draw & Paint | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.5 | \n",
+ " 215644 | \n",
+ " 25M | \n",
+ " 50,000,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Teen | \n",
+ " Art & Design | \n",
+ " June 8, 2018 | \n",
+ " Varies with device | \n",
+ " 4.2 and up | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Pixel Draw - Number Art Coloring Book | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.3 | \n",
+ " 967 | \n",
+ " 2.8M | \n",
+ " 100,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design;Creativity | \n",
+ " June 20, 2018 | \n",
+ " 1.1 | \n",
+ " 4.4 and up | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating \\\n",
+ "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN 4.1 \n",
+ "1 Coloring book moana ART_AND_DESIGN 3.9 \n",
+ "2 U Launcher Lite – FREE Live Cool Themes, Hide ... ART_AND_DESIGN 4.7 \n",
+ "3 Sketch - Draw & Paint ART_AND_DESIGN 4.5 \n",
+ "4 Pixel Draw - Number Art Coloring Book ART_AND_DESIGN 4.3 \n",
+ "\n",
+ " Reviews Size Installs Type Price Content Rating \\\n",
+ "0 159 19M 10,000+ Free 0 Everyone \n",
+ "1 967 14M 500,000+ Free 0 Everyone \n",
+ "2 87510 8.7M 5,000,000+ Free 0 Everyone \n",
+ "3 215644 25M 50,000,000+ Free 0 Teen \n",
+ "4 967 2.8M 100,000+ Free 0 Everyone \n",
+ "\n",
+ " Genres Last Updated Current Ver \\\n",
+ "0 Art & Design January 7, 2018 1.0.0 \n",
+ "1 Art & Design;Pretend Play January 15, 2018 2.0.0 \n",
+ "2 Art & Design August 1, 2018 1.2.4 \n",
+ "3 Art & Design June 8, 2018 Varies with device \n",
+ "4 Art & Design;Creativity June 20, 2018 1.1 \n",
+ "\n",
+ " Android Ver \n",
+ "0 4.0.3 and up \n",
+ "1 4.0.3 and up \n",
+ "2 4.0.3 and up \n",
+ "3 4.2 and up \n",
+ "4 4.4 and up "
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_play.head(5)"
]
},
{
@@ -112,11 +452,46 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play[\"Reviews_numeric\"] = google_play[\"Reviews\"].apply(pd.to_numeric, errors = \"coerce\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App object\n",
+ "Category object\n",
+ "Rating float64\n",
+ "Reviews object\n",
+ "Size object\n",
+ "Installs object\n",
+ "Type object\n",
+ "Price object\n",
+ "Content Rating object\n",
+ "Genres object\n",
+ "Last Updated object\n",
+ "Current Ver object\n",
+ "Android Ver object\n",
+ "Reviews_numeric float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 123,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "google_play.dtypes"
]
},
{
@@ -128,11 +503,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play[\"Reviews_isnull\"] = google_play[\"Reviews_numeric\"].isnull()\n"
]
},
{
@@ -148,11 +524,89 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 125,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Size | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Price | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ " Reviews_numeric | \n",
+ " Reviews_isnull | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10472 | \n",
+ " Life Made WI-Fi Touchscreen Photo Frame | \n",
+ " 1.9 | \n",
+ " 19.0 | \n",
+ " 3.0M | \n",
+ " 1,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " NaN | \n",
+ " February 11, 2018 | \n",
+ " 1.0.19 | \n",
+ " 4.0 and up | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating Reviews \\\n",
+ "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3.0M \n",
+ "\n",
+ " Size Installs Type Price Content Rating Genres \\\n",
+ "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n",
+ "\n",
+ " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n",
+ "10472 1.0.19 4.0 and up NaN NaN True "
+ ]
+ },
+ "execution_count": 125,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_play[google_play[\"Reviews_isnull\"]]\n"
]
},
{
@@ -172,13 +626,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 126,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 126,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here\n",
"\n",
"def convert_string_to_numeric(s):\n",
+ " \n",
" \"\"\"\n",
" Convert a string value to numeric. If the last character of the string is `M`, obtain the \n",
" numeric part of the string, multiply it with 1,000,000, then return the result. Otherwise, \n",
@@ -190,8 +656,14 @@
" Returns:\n",
" The correct numeric value of the Reviews score.\n",
" \"\"\"\n",
- " return np.NaN\n",
- "\n",
+ " try:\n",
+ " if s[-1].upper() == \"M\":\n",
+ " return float(s[:-1]) * 1_000_000\n",
+ " else:\n",
+ " return float(s)\n",
+ " except ValueError:\n",
+ " return np.NaN\n",
+ " \n",
"test_string = '4.0M'\n",
"\n",
"convert_string_to_numeric(test_string) == 4000000"
@@ -206,11 +678,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play[\"Reviews\"] = google_play[\"Reviews\"].apply(convert_string_to_numeric)\n"
]
},
{
@@ -224,11 +697,89 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 128,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Size | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Price | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ " Reviews_numeric | \n",
+ " Reviews_isnull | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10472 | \n",
+ " Life Made WI-Fi Touchscreen Photo Frame | \n",
+ " 1.9 | \n",
+ " 19.0 | \n",
+ " 3000000.0 | \n",
+ " 1,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " NaN | \n",
+ " February 11, 2018 | \n",
+ " 1.0.19 | \n",
+ " 4.0 and up | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating Reviews \\\n",
+ "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3000000.0 \n",
+ "\n",
+ " Size Installs Type Price Content Rating Genres \\\n",
+ "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n",
+ "\n",
+ " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n",
+ "10472 1.0.19 4.0 and up NaN NaN True "
+ ]
+ },
+ "execution_count": 128,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "# Your code here\n",
+ "google_play[google_play[\"Reviews_isnull\"]]\n"
]
},
{
@@ -240,11 +791,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 129,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App object\n",
+ "Category object\n",
+ "Rating float64\n",
+ "Reviews float64\n",
+ "Size object\n",
+ "Installs object\n",
+ "Type object\n",
+ "Price object\n",
+ "Content Rating object\n",
+ "Genres object\n",
+ "Last Updated object\n",
+ "Current Ver object\n",
+ "Android Ver object\n",
+ "Reviews_numeric float64\n",
+ "Reviews_isnull bool\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 129,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_play.dtypes"
]
},
{
@@ -258,11 +836,79 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 130,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['19M', '14M', '8.7M', '25M', '2.8M', '5.6M', '29M', '33M', '3.1M',\n",
+ " '28M', '12M', '20M', '21M', '37M', '2.7M', '5.5M', '17M', '39M',\n",
+ " '31M', '4.2M', '7.0M', '23M', '6.0M', '6.1M', '4.6M', '9.2M',\n",
+ " '5.2M', '11M', '24M', 'Varies with device', '9.4M', '15M', '10M',\n",
+ " '1.2M', '26M', '8.0M', '7.9M', '56M', '57M', '35M', '54M', '201k',\n",
+ " '3.6M', '5.7M', '8.6M', '2.4M', '27M', '2.5M', '16M', '3.4M',\n",
+ " '8.9M', '3.9M', '2.9M', '38M', '32M', '5.4M', '18M', '1.1M',\n",
+ " '2.2M', '4.5M', '9.8M', '52M', '9.0M', '6.7M', '30M', '2.6M',\n",
+ " '7.1M', '3.7M', '22M', '7.4M', '6.4M', '3.2M', '8.2M', '9.9M',\n",
+ " '4.9M', '9.5M', '5.0M', '5.9M', '13M', '73M', '6.8M', '3.5M',\n",
+ " '4.0M', '2.3M', '7.2M', '2.1M', '42M', '7.3M', '9.1M', '55M',\n",
+ " '23k', '6.5M', '1.5M', '7.5M', '51M', '41M', '48M', '8.5M', '46M',\n",
+ " '8.3M', '4.3M', '4.7M', '3.3M', '40M', '7.8M', '8.8M', '6.6M',\n",
+ " '5.1M', '61M', '66M', '79k', '8.4M', '118k', '44M', '695k', '1.6M',\n",
+ " '6.2M', '18k', '53M', '1.4M', '3.0M', '5.8M', '3.8M', '9.6M',\n",
+ " '45M', '63M', '49M', '77M', '4.4M', '4.8M', '70M', '6.9M', '9.3M',\n",
+ " '10.0M', '8.1M', '36M', '84M', '97M', '2.0M', '1.9M', '1.8M',\n",
+ " '5.3M', '47M', '556k', '526k', '76M', '7.6M', '59M', '9.7M', '78M',\n",
+ " '72M', '43M', '7.7M', '6.3M', '334k', '34M', '93M', '65M', '79M',\n",
+ " '100M', '58M', '50M', '68M', '64M', '67M', '60M', '94M', '232k',\n",
+ " '99M', '624k', '95M', '8.5k', '41k', '292k', '11k', '80M', '1.7M',\n",
+ " '74M', '62M', '69M', '75M', '98M', '85M', '82M', '96M', '87M',\n",
+ " '71M', '86M', '91M', '81M', '92M', '83M', '88M', '704k', '862k',\n",
+ " '899k', '378k', '266k', '375k', '1.3M', '975k', '980k', '4.1M',\n",
+ " '89M', '696k', '544k', '525k', '920k', '779k', '853k', '720k',\n",
+ " '713k', '772k', '318k', '58k', '241k', '196k', '857k', '51k',\n",
+ " '953k', '865k', '251k', '930k', '540k', '313k', '746k', '203k',\n",
+ " '26k', '314k', '239k', '371k', '220k', '730k', '756k', '91k',\n",
+ " '293k', '17k', '74k', '14k', '317k', '78k', '924k', '902k', '818k',\n",
+ " '81k', '939k', '169k', '45k', '475k', '965k', '90M', '545k', '61k',\n",
+ " '283k', '655k', '714k', '93k', '872k', '121k', '322k', '1.0M',\n",
+ " '976k', '172k', '238k', '549k', '206k', '954k', '444k', '717k',\n",
+ " '210k', '609k', '308k', '705k', '306k', '904k', '473k', '175k',\n",
+ " '350k', '383k', '454k', '421k', '70k', '812k', '442k', '842k',\n",
+ " '417k', '412k', '459k', '478k', '335k', '782k', '721k', '430k',\n",
+ " '429k', '192k', '200k', '460k', '728k', '496k', '816k', '414k',\n",
+ " '506k', '887k', '613k', '243k', '569k', '778k', '683k', '592k',\n",
+ " '319k', '186k', '840k', '647k', '191k', '373k', '437k', '598k',\n",
+ " '716k', '585k', '982k', '222k', '219k', '55k', '948k', '323k',\n",
+ " '691k', '511k', '951k', '963k', '25k', '554k', '351k', '27k',\n",
+ " '82k', '208k', '913k', '514k', '551k', '29k', '103k', '898k',\n",
+ " '743k', '116k', '153k', '209k', '353k', '499k', '173k', '597k',\n",
+ " '809k', '122k', '411k', '400k', '801k', '787k', '237k', '50k',\n",
+ " '643k', '986k', '97k', '516k', '837k', '780k', '961k', '269k',\n",
+ " '20k', '498k', '600k', '749k', '642k', '881k', '72k', '656k',\n",
+ " '601k', '221k', '228k', '108k', '940k', '176k', '33k', '663k',\n",
+ " '34k', '942k', '259k', '164k', '458k', '245k', '629k', '28k',\n",
+ " '288k', '775k', '785k', '636k', '916k', '994k', '309k', '485k',\n",
+ " '914k', '903k', '608k', '500k', '54k', '562k', '847k', '957k',\n",
+ " '688k', '811k', '270k', '48k', '329k', '523k', '921k', '874k',\n",
+ " '981k', '784k', '280k', '24k', '518k', '754k', '892k', '154k',\n",
+ " '860k', '364k', '387k', '626k', '161k', '879k', '39k', '970k',\n",
+ " '170k', '141k', '160k', '144k', '143k', '190k', '376k', '193k',\n",
+ " '246k', '73k', '658k', '992k', '253k', '420k', '404k', '1,000+',\n",
+ " '470k', '226k', '240k', '89k', '234k', '257k', '861k', '467k',\n",
+ " '157k', '44k', '676k', '67k', '552k', '885k', '1020k', '582k',\n",
+ " '619k'], dtype=object)"
+ ]
+ },
+ "execution_count": 130,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_play[\"Size\"].unique()"
]
},
{
@@ -278,11 +924,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 131,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The porportion of its occurence to the total number of records is 0.15635089013928605\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "size= google_play[\"Size\"]\n",
+ "porportion = google_play[\"Size\"][size == \"Varies with device\"].count()/len(google_play)\n",
+ "print(\"The porportion of its occurence to the total number of records is\", porportion )\n"
]
},
{
@@ -298,11 +955,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 132,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play.drop(\"Size\", axis=1, inplace=True)"
]
},
{
@@ -318,11 +976,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 133,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App 0\n",
+ "Category 0\n",
+ "Rating 1474\n",
+ "Reviews 0\n",
+ "Installs 0\n",
+ "Type 1\n",
+ "Price 0\n",
+ "Content Rating 1\n",
+ "Genres 0\n",
+ "Last Updated 0\n",
+ "Current Ver 8\n",
+ "Android Ver 3\n",
+ "Reviews_numeric 1\n",
+ "Reviews_isnull 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 133,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_play.isna().sum()\n"
]
},
{
@@ -338,11 +1022,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 134,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The ratio is: 13.6 %\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "r_ratin= google_play[\"Rating\"].isna().sum() /len(google_play)\n",
+ "\n",
+ "print(\"The ratio is:\", round(r_ratin*100, 2),\"%\")"
]
},
{
@@ -364,11 +1059,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 135,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App 0\n",
+ "Category 0\n",
+ "Rating 0\n",
+ "Reviews 0\n",
+ "Installs 0\n",
+ "Type 0\n",
+ "Price 0\n",
+ "Content Rating 0\n",
+ "Genres 0\n",
+ "Last Updated 0\n",
+ "Current Ver 0\n",
+ "Android Ver 0\n",
+ "Reviews_numeric 0\n",
+ "Reviews_isnull 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 135,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_missing_removed = google_play.dropna()\n",
+ "google_missing_removed.isna().sum()"
]
},
{
@@ -384,11 +1106,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 136,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Ana\\AppData\\Local\\Temp\\ipykernel_20316\\1626989971.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " google_missing_removed[\"Last Updated\"]= pd.to_datetime(google_missing_removed[\"Last Updated\"])\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed[\"Last Updated\"]= pd.to_datetime(google_missing_removed[\"Last Updated\"])"
]
},
{
@@ -402,11 +1138,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 137,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['0', '$4.99', '$3.99', '$6.99', '$7.99', '$5.99', '$2.99', '$3.49',\n",
+ " '$1.99', '$9.99', '$7.49', '$0.99', '$9.00', '$5.49', '$10.00',\n",
+ " '$24.99', '$11.99', '$79.99', '$16.99', '$14.99', '$29.99',\n",
+ " '$12.99', '$2.49', '$10.99', '$1.50', '$19.99', '$15.99', '$33.99',\n",
+ " '$39.99', '$3.95', '$4.49', '$1.70', '$8.99', '$1.49', '$3.88',\n",
+ " '$399.99', '$17.99', '$400.00', '$3.02', '$1.76', '$4.84', '$4.77',\n",
+ " '$1.61', '$2.50', '$1.59', '$6.49', '$1.29', '$299.99', '$379.99',\n",
+ " '$37.99', '$18.99', '$389.99', '$8.49', '$1.75', '$14.00', '$2.00',\n",
+ " '$3.08', '$2.59', '$19.40', '$3.90', '$4.59', '$15.46', '$3.04',\n",
+ " '$13.99', '$4.29', '$3.28', '$4.60', '$1.00', '$2.95', '$2.90',\n",
+ " '$1.97', '$2.56', '$1.20'], dtype=object)"
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_missing_removed[\"Price\"].unique()"
]
},
{
@@ -422,11 +1180,45 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 138,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Ana\\AppData\\Local\\Temp\\ipykernel_20316\\969255384.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " google_missing_removed[\"Price Numerical\"] = google_missing_removed[\"Price\"].str.replace('$', '')\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array(['0', '4.99', '3.99', '6.99', '7.99', '5.99', '2.99', '3.49',\n",
+ " '1.99', '9.99', '7.49', '0.99', '9.00', '5.49', '10.00', '24.99',\n",
+ " '11.99', '79.99', '16.99', '14.99', '29.99', '12.99', '2.49',\n",
+ " '10.99', '1.50', '19.99', '15.99', '33.99', '39.99', '3.95',\n",
+ " '4.49', '1.70', '8.99', '1.49', '3.88', '399.99', '17.99',\n",
+ " '400.00', '3.02', '1.76', '4.84', '4.77', '1.61', '2.50', '1.59',\n",
+ " '6.49', '1.29', '299.99', '379.99', '37.99', '18.99', '389.99',\n",
+ " '8.49', '1.75', '14.00', '2.00', '3.08', '2.59', '19.40', '3.90',\n",
+ " '4.59', '15.46', '3.04', '13.99', '4.29', '3.28', '4.60', '1.00',\n",
+ " '2.95', '2.90', '1.97', '2.56', '1.20'], dtype=object)"
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_missing_removed[\"Price Numerical\"] = google_missing_removed[\"Price\"].str.replace('$', '')\n",
+ "google_missing_removed[\"Price Numerical\"].unique()"
]
},
{
@@ -438,11 +1230,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 139,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Ana\\AppData\\Local\\Temp\\ipykernel_20316\\1645924248.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " google_missing_removed[\"Price Numerical\"] = pd.to_numeric(google_missing_removed[\"Price Numerical\"], errors = \"coerce\")\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed[\"Price Numerical\"] = pd.to_numeric(google_missing_removed[\"Price Numerical\"], errors = \"coerce\")"
]
},
{
@@ -454,11 +1260,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 140,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Ana\\AppData\\Local\\Temp\\ipykernel_20316\\2114075409.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " google_missing_removed.drop(\"Price\", axis=1, inplace=True)\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed.drop(\"Price\", axis=1, inplace=True)"
]
},
{
@@ -474,11 +1293,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 141,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App object\n",
+ "Category object\n",
+ "Rating float64\n",
+ "Reviews float64\n",
+ "Installs object\n",
+ "Type object\n",
+ "Content Rating object\n",
+ "Genres object\n",
+ "Last Updated datetime64[ns]\n",
+ "Current Ver object\n",
+ "Android Ver object\n",
+ "Reviews_numeric float64\n",
+ "Reviews_isnull bool\n",
+ "Price Numerical float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 141,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "# Your code here\n",
+ "google_missing_removed.dtypes"
]
},
{
@@ -497,11 +1342,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 142,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_reviews = pd.read_csv(r\"C:\\Users\\Ana\\OneDrive\\Documents\\Ironhack\\Week 7\\Labs\\lab-supervised-learning-feature-extraction\\data\\googleplaystore_user_reviews.csv\")"
]
},
{
@@ -515,11 +1361,106 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 143,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Translated_Review | \n",
+ " Sentiment | \n",
+ " Sentiment_Polarity | \n",
+ " Sentiment_Subjectivity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10 Best Foods for You | \n",
+ " I like eat delicious food. That's I'm cooking ... | \n",
+ " Positive | \n",
+ " 1.00 | \n",
+ " 0.533333 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 10 Best Foods for You | \n",
+ " This help eating healthy exercise regular basis | \n",
+ " Positive | \n",
+ " 0.25 | \n",
+ " 0.288462 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 10 Best Foods for You | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 10 Best Foods for You | \n",
+ " Works great especially going grocery store | \n",
+ " Positive | \n",
+ " 0.40 | \n",
+ " 0.875000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 10 Best Foods for You | \n",
+ " Best idea us | \n",
+ " Positive | \n",
+ " 1.00 | \n",
+ " 0.300000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Translated_Review \\\n",
+ "0 10 Best Foods for You I like eat delicious food. That's I'm cooking ... \n",
+ "1 10 Best Foods for You This help eating healthy exercise regular basis \n",
+ "2 10 Best Foods for You NaN \n",
+ "3 10 Best Foods for You Works great especially going grocery store \n",
+ "4 10 Best Foods for You Best idea us \n",
+ "\n",
+ " Sentiment Sentiment_Polarity Sentiment_Subjectivity \n",
+ "0 Positive 1.00 0.533333 \n",
+ "1 Positive 0.25 0.288462 \n",
+ "2 NaN NaN NaN \n",
+ "3 Positive 0.40 0.875000 \n",
+ "4 Positive 1.00 0.300000 "
+ ]
+ },
+ "execution_count": 143,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "# Your code here\n",
+ "google_reviews.head()"
]
},
{
@@ -546,11 +1487,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 144,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "review_missing_removed = google_reviews.dropna()\n"
]
},
{
@@ -562,11 +1504,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 145,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App\n",
+ "Bowmasters 312\n",
+ "Helix Jump 273\n",
+ "Angry Birds Classic 273\n",
+ "Calorie Counter - MyFitnessPal 254\n",
+ "Duolingo: Learn Languages Free 240\n",
+ " ... \n",
+ "Draw a Stickman: EPIC 2 1\n",
+ "HD Camera 1\n",
+ "Draw In 1\n",
+ "Draw A Stickman 1\n",
+ "Best Fiends - Free Puzzle Game 1\n",
+ "Name: count, Length: 865, dtype: int64"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "review_missing_removed[\"App\"].value_counts()"
]
},
{
@@ -595,11 +1561,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
"# Your code below\n",
+ "import numpy as np\n",
"\n",
"def positive_function(x):\n",
" \"\"\"\n",
@@ -611,7 +1578,29 @@
" Returns:\n",
" The number of occurrences of `Positive` in the column data.\n",
" \"\"\"\n",
- " return 0"
+ " \n",
+ " y= np.where( x== \"Positive\")[0]\n",
+ " return len(y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 147,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "23998"
+ ]
+ },
+ "execution_count": 147,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "positive_function(review_missing_removed)"
]
},
{
@@ -635,11 +1624,136 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 148,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Positive | \n",
+ " Total | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10 Best Foods for You | \n",
+ " 162 | \n",
+ " 194 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 | \n",
+ " 31 | \n",
+ " 40 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11st | \n",
+ " 23 | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1800 Contacts - Lens Store | \n",
+ " 64 | \n",
+ " 80 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1LINE – One Line with One Touch | \n",
+ " 27 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 860 | \n",
+ " Hotels.com: Book Hotel Rooms & Find Vacation D... | \n",
+ " 39 | \n",
+ " 68 | \n",
+ "
\n",
+ " \n",
+ " | 861 | \n",
+ " Hotspot Shield Free VPN Proxy & Wi-Fi Security | \n",
+ " 17 | \n",
+ " 34 | \n",
+ "
\n",
+ " \n",
+ " | 862 | \n",
+ " Hotstar | \n",
+ " 14 | \n",
+ " 32 | \n",
+ "
\n",
+ " \n",
+ " | 863 | \n",
+ " Hotwire Hotel & Car Rental App | \n",
+ " 16 | \n",
+ " 33 | \n",
+ "
\n",
+ " \n",
+ " | 864 | \n",
+ " Housing-Real Estate & Property | \n",
+ " 8 | \n",
+ " 21 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
865 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Positive Total\n",
+ "0 10 Best Foods for You 162 194\n",
+ "1 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 31 40\n",
+ "2 11st 23 39\n",
+ "3 1800 Contacts - Lens Store 64 80\n",
+ "4 1LINE – One Line with One Touch 27 38\n",
+ ".. ... ... ...\n",
+ "860 Hotels.com: Book Hotel Rooms & Find Vacation D... 39 68\n",
+ "861 Hotspot Shield Free VPN Proxy & Wi-Fi Security 17 34\n",
+ "862 Hotstar 14 32\n",
+ "863 Hotwire Hotel & Car Rental App 16 33\n",
+ "864 Housing-Real Estate & Property 8 21\n",
+ "\n",
+ "[865 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 148,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_agg = review_missing_removed.groupby(['App']).agg({'Sentiment': positive_function, 'Sentiment_Polarity': 'count'}).reset_index()\n",
+ "\n",
+ "google_agg.rename(columns={\"Sentiment\": \"Positive\", \"Sentiment_Polarity\": \"Total\"}, inplace=True)\n",
+ "\n",
+ "google_agg"
]
},
{
@@ -651,11 +1765,87 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 149,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Positive | \n",
+ " Total | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10 Best Foods for You | \n",
+ " 162 | \n",
+ " 194 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 | \n",
+ " 31 | \n",
+ " 40 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11st | \n",
+ " 23 | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1800 Contacts - Lens Store | \n",
+ " 64 | \n",
+ " 80 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1LINE – One Line with One Touch | \n",
+ " 27 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Positive Total\n",
+ "0 10 Best Foods for You 162 194\n",
+ "1 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 31 40\n",
+ "2 11st 23 39\n",
+ "3 1800 Contacts - Lens Store 64 80\n",
+ "4 1LINE – One Line with One Touch 27 38"
+ ]
+ },
+ "execution_count": 149,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "# Your code here\n",
+ "google_agg.head(5)"
]
},
{
@@ -669,11 +1859,94 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 150,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Positive | \n",
+ " Total | \n",
+ " Positive Ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10 Best Foods for You | \n",
+ " 162 | \n",
+ " 194 | \n",
+ " 0.835052 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 | \n",
+ " 31 | \n",
+ " 40 | \n",
+ " 0.775000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11st | \n",
+ " 23 | \n",
+ " 39 | \n",
+ " 0.589744 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1800 Contacts - Lens Store | \n",
+ " 64 | \n",
+ " 80 | \n",
+ " 0.800000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1LINE – One Line with One Touch | \n",
+ " 27 | \n",
+ " 38 | \n",
+ " 0.710526 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Positive Total Positive Ratio\n",
+ "0 10 Best Foods for You 162 194 0.835052\n",
+ "1 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 31 40 0.775000\n",
+ "2 11st 23 39 0.589744\n",
+ "3 1800 Contacts - Lens Store 64 80 0.800000\n",
+ "4 1LINE – One Line with One Touch 27 38 0.710526"
+ ]
+ },
+ "execution_count": 150,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_agg[\"Positive Ratio\"] = np.where(google_agg[\"Total\"] == 0, 0, google_agg[\"Positive\"] / google_agg[\"Total\"])\n",
+ "google_agg.head()"
]
},
{
@@ -685,11 +1958,120 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 151,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Positive Ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10 Best Foods for You | \n",
+ " 0.835052 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 | \n",
+ " 0.775000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11st | \n",
+ " 0.589744 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1800 Contacts - Lens Store | \n",
+ " 0.800000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1LINE – One Line with One Touch | \n",
+ " 0.710526 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 860 | \n",
+ " Hotels.com: Book Hotel Rooms & Find Vacation D... | \n",
+ " 0.573529 | \n",
+ "
\n",
+ " \n",
+ " | 861 | \n",
+ " Hotspot Shield Free VPN Proxy & Wi-Fi Security | \n",
+ " 0.500000 | \n",
+ "
\n",
+ " \n",
+ " | 862 | \n",
+ " Hotstar | \n",
+ " 0.437500 | \n",
+ "
\n",
+ " \n",
+ " | 863 | \n",
+ " Hotwire Hotel & Car Rental App | \n",
+ " 0.484848 | \n",
+ "
\n",
+ " \n",
+ " | 864 | \n",
+ " Housing-Real Estate & Property | \n",
+ " 0.380952 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
865 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Positive Ratio\n",
+ "0 10 Best Foods for You 0.835052\n",
+ "1 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 0.775000\n",
+ "2 11st 0.589744\n",
+ "3 1800 Contacts - Lens Store 0.800000\n",
+ "4 1LINE – One Line with One Touch 0.710526\n",
+ ".. ... ...\n",
+ "860 Hotels.com: Book Hotel Rooms & Find Vacation D... 0.573529\n",
+ "861 Hotspot Shield Free VPN Proxy & Wi-Fi Security 0.500000\n",
+ "862 Hotstar 0.437500\n",
+ "863 Hotwire Hotel & Car Rental App 0.484848\n",
+ "864 Housing-Real Estate & Property 0.380952\n",
+ "\n",
+ "[865 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 151,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_agg.drop([\"Positive\", \"Total\"], axis=1, inplace=True)\n"
]
},
{
@@ -703,11 +2085,81 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 155,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Positive Ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10 Best Foods for You | \n",
+ " 0.835052 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 | \n",
+ " 0.775000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 11st | \n",
+ " 0.589744 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1800 Contacts - Lens Store | \n",
+ " 0.800000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1LINE – One Line with One Touch | \n",
+ " 0.710526 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Positive Ratio\n",
+ "0 10 Best Foods for You 0.835052\n",
+ "1 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 0.775000\n",
+ "2 11st 0.589744\n",
+ "3 1800 Contacts - Lens Store 0.800000\n",
+ "4 1LINE – One Line with One Touch 0.710526"
+ ]
+ },
+ "execution_count": 155,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google_agg.head(5)"
]
},
{
@@ -723,11 +2175,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 156,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google =google_missing_removed.merge(google_agg, on = \"App\")\n"
]
},
{
@@ -741,11 +2194,180 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 157,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ " Reviews_numeric | \n",
+ " Reviews_isnull | \n",
+ " Price Numerical | \n",
+ " Positive Ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Coloring book moana | \n",
+ " ART_AND_DESIGN | \n",
+ " 3.9 | \n",
+ " 967.0 | \n",
+ " 500,000+ | \n",
+ " Free | \n",
+ " Everyone | \n",
+ " Art & Design;Pretend Play | \n",
+ " 2018-01-15 | \n",
+ " 2.0.0 | \n",
+ " 4.0.3 and up | \n",
+ " 967.0 | \n",
+ " False | \n",
+ " 0.0 | \n",
+ " 0.590909 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Coloring book moana | \n",
+ " FAMILY | \n",
+ " 3.9 | \n",
+ " 974.0 | \n",
+ " 500,000+ | \n",
+ " Free | \n",
+ " Everyone | \n",
+ " Art & Design;Pretend Play | \n",
+ " 2018-01-15 | \n",
+ " 2.0.0 | \n",
+ " 4.0.3 and up | \n",
+ " 974.0 | \n",
+ " False | \n",
+ " 0.0 | \n",
+ " 0.590909 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Garden Coloring Book | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.4 | \n",
+ " 13791.0 | \n",
+ " 1,000,000+ | \n",
+ " Free | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " 2017-09-20 | \n",
+ " 2.9.2 | \n",
+ " 3.0 and up | \n",
+ " 13791.0 | \n",
+ " False | \n",
+ " 0.0 | \n",
+ " 0.711111 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " FlipaClip - Cartoon animation | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.3 | \n",
+ " 194216.0 | \n",
+ " 5,000,000+ | \n",
+ " Free | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " 2018-08-03 | \n",
+ " 2.2.5 | \n",
+ " 4.0.3 and up | \n",
+ " 194216.0 | \n",
+ " False | \n",
+ " 0.0 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Boys Photo Editor - Six Pack & Men's Suit | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.1 | \n",
+ " 654.0 | \n",
+ " 100,000+ | \n",
+ " Free | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " 2018-03-20 | \n",
+ " 1.1 | \n",
+ " 4.0.3 and up | \n",
+ " 654.0 | \n",
+ " False | \n",
+ " 0.0 | \n",
+ " 0.605263 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating \\\n",
+ "0 Coloring book moana ART_AND_DESIGN 3.9 \n",
+ "1 Coloring book moana FAMILY 3.9 \n",
+ "2 Garden Coloring Book ART_AND_DESIGN 4.4 \n",
+ "3 FlipaClip - Cartoon animation ART_AND_DESIGN 4.3 \n",
+ "4 Boys Photo Editor - Six Pack & Men's Suit ART_AND_DESIGN 4.1 \n",
+ "\n",
+ " Reviews Installs Type Content Rating Genres \\\n",
+ "0 967.0 500,000+ Free Everyone Art & Design;Pretend Play \n",
+ "1 974.0 500,000+ Free Everyone Art & Design;Pretend Play \n",
+ "2 13791.0 1,000,000+ Free Everyone Art & Design \n",
+ "3 194216.0 5,000,000+ Free Everyone Art & Design \n",
+ "4 654.0 100,000+ Free Everyone Art & Design \n",
+ "\n",
+ " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \\\n",
+ "0 2018-01-15 2.0.0 4.0.3 and up 967.0 False \n",
+ "1 2018-01-15 2.0.0 4.0.3 and up 974.0 False \n",
+ "2 2017-09-20 2.9.2 3.0 and up 13791.0 False \n",
+ "3 2018-08-03 2.2.5 4.0.3 and up 194216.0 False \n",
+ "4 2018-03-20 1.1 4.0.3 and up 654.0 False \n",
+ "\n",
+ " Price Numerical Positive Ratio \n",
+ "0 0.0 0.590909 \n",
+ "1 0.0 0.590909 \n",
+ "2 0.0 0.711111 \n",
+ "3 0.0 1.000000 \n",
+ "4 0.0 0.605263 "
+ ]
+ },
+ "execution_count": 157,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "google.head(5)"
]
}
],
@@ -765,7 +2387,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.2"
+ "version": "3.10.9"
}
},
"nbformat": 4,