In [4]:
import pandas as pd
import numpy as np
df = pd.read_csv('playstore.csv')
df = df.drop('Unnamed: 0', axis=1)
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [6]:
df.shape[0], df.shape[1]

(10840, 13)

**Описание данных**
- App — название приложения
- Category — категория, к которой относится приложение
- Rating — рейтинг пользователей
- Reviews — количество отзывов пользователей о приложении
- Size — размер приложения
- Installs — количество загрузок/установок приложения пользователями
- Type — платное или бесплатное приложение
- Price — цена приложения
- Content Rating — возрастная группа, на которую ориентировано приложение
- Genres — принадлежность приложения к нескольким жанрам
- Last Updated — дата последнего обновления приложения в Play Store
- Current Ver — текущая версия приложения в Play Store
- Android Ver — минимальная требуемая версия Android

Какое количество уникальных приложений есть у нас в наших данных (колонка App).

In [5]:
len(df['App'].unique())

9659

Для дальнейшего анализа нам необходимо убрать дубликаты приложений (колонка App) и сбросить индекс. Сохраните полученный результат в виде датафрейма в переменную unique_playstore

In [18]:
unique_playstore = df.copy()
unique_playstore['App'] = unique_playstore['App'].drop_duplicates()

In [22]:
unique_playstore = unique_playstore.set_index('App')

In [23]:
unique_playstore

Unnamed: 0_level_0,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
App,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
"U Launcher Lite – FREE Live Cool Themes, Hide Apps",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...
Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
Parkinson Exercices FR,MEDICAL,,3,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


В этом задании названия колонок необходимо привести к стандартному виду — все буквы должны быть нижнего регистра, а пробелы должны быть заменены на нижние подчеркивания. 

In [24]:
df = unique_playstore.copy()

In [25]:
df.columns = [col.lower().replace(' ', '_') for col in df.columns]

In [26]:
df

Unnamed: 0_level_0,category,rating,reviews,size,installs,type,price,content_rating,genres,last_updated,current_ver,android_ver
App,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
"U Launcher Lite – FREE Live Cool Themes, Hide Apps",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...
Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
Parkinson Exercices FR,MEDICAL,,3,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


Давайте посмотрим на долю платных и бесплатных приложений.

In [28]:
df[df['type'] == 'Free'].shape[0] / df.shape[0]

0.9261070110701107

In [29]:
df[df['type'] != 'Free'].shape[0] / df.shape[0]

0.0738929889298893

Отберите из датафрейма только те приложения, которые относятся к образовательной категории  (EDUCATION) и с количеством отзывов пользователей более 1000.

In [30]:
df[(df['category'] == 'EDUCATION') & (df['reviews'] > 1000)]

Unnamed: 0_level_0,category,rating,reviews,size,installs,type,price,content_rating,genres,last_updated,current_ver,android_ver
App,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Duolingo: Learn Languages Free,EDUCATION,4.7,6289924,Varies with device,"100,000,000+",Free,0,Everyone,Education;Education,"August 1, 2018",Varies with device,Varies with device
TED,EDUCATION,4.6,181893,18M,"10,000,000+",Free,0,Everyone 10+,Education,"July 27, 2018",3.2.5,4.1 and up
English Communication - Learn English for Chinese (Learn English for Chinese),EDUCATION,4.7,2544,18M,"100,000+",Free,0,Everyone,Education,"December 29, 2017",3.1,4.0 and up
Khan Academy,EDUCATION,4.6,85375,21M,"5,000,000+",Free,0,Everyone,Education,"July 27, 2018",5.0.0,4.1 and up
Learn English with Wlingua,EDUCATION,4.7,314299,3.3M,"10,000,000+",Free,0,Everyone,Education,"May 2, 2018",1.94.9,4.0 and up
...,...,...,...,...,...,...,...,...,...,...,...,...
SoloLearn: Learn to Code for Free,EDUCATION,4.8,256079,7.6M,"1,000,000+",Free,0,Teen,Education,"July 12, 2018",2.2.4,4.0.3 and up
Kids Learn Languages by Mondly,EDUCATION,4.4,2078,Varies with device,"100,000+",Free,0,Everyone,Education;Education,"December 24, 2017",1.0.2,4.1 and up
Blinkist - Nonfiction Books,EDUCATION,4.1,16103,13M,"1,000,000+",Free,0,Everyone,Education,"July 31, 2018",5.7.1,4.1 and up
Toca Life: City,EDUCATION,4.7,31085,24M,"500,000+",Paid,$3.99,Everyone,Education;Pretend Play,"July 6, 2018",1.5-play,4.4 and up


Трансформируйте данные о времени последнего обновления из датафрейма playstore (сохранен в ЛМС) в колонке last_updated в формат времени.

In [31]:
df['last_updated'] = pd.to_datetime(df['last_updated'])

Давайте уберем лишние символы из колонки с ценой (price), чтобы далее с ней было удобнее работать, и переведем данные в тип float.

In [32]:
df['price'].value_counts()

0         10040
$0.99       148
$2.99       129
$1.99        73
$4.99        72
          ...  
$19.90        1
$1.75         1
$14.00        1
$4.85         1
$1.04         1
Name: price, Length: 92, dtype: int64

In [34]:
df['price'] = df['price'].apply(lambda x: x.replace('$', '')).astype(np.float64)

Рассчитаем медиану и среднее значение price и сохраним это в переменные price_median и price_mean, округлив значения до 2-х знаков после запятой

In [35]:
price_median = round(df['price'].median(), 2)
price_mean = round(df['price'].mean(), 2)
price_median, price_mean

(0.0, 1.03)