# Imports

In [30]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os

# Importing the Data Set and Defining its Dimensions

In [31]:
google_data = pd.read_csv('../input/google-play-store-apps/googleplaystore.csv')

nRow, nCol = google_data.shape
print(f'There are {nRow} rows and {nCol} columns')

# Gathering the Amount of Data Points and Variable Types

In [32]:
google_data.info()

# Describing the Rating Values

In [33]:
google_data.describe()

# Checking for Outliers Using Boxplot and Histogram

In [34]:
google_data.boxplot()

In [35]:
google_data.hist()

# Based on the histogram's maximum value and the boxplot's horizontal range, we know there is a rating value that is greater than 5. Since the max of the boxplot is above 17.5, there is an outlier cleaned out of the data.

# Checking the Amount of Null Values

In [36]:
google_data.isnull().sum()

# Check for Rating Values Greater than 5

In [37]:
google_data[google_data.Rating > 5]

# Since there is a rating greater than 5, we are going to drop that app from the data set.

In [38]:
google_data.drop(10472, inplace=True)

# Redisplaying the boxplot and histogram to show the updated rating values without the outlier:

In [39]:
google_data.boxplot()

In [40]:
google_data.hist()

# **Data Manipulation**

Since the "Ratings" variable has over 1,000 null entries, we are going to fill in these values with the median "Ratings" value across all data points.

In [41]:
def calc_median(series):
    return series.fillna(series.median())

In [42]:
google_data.Rating = google_data['Rating'].transform(calc_median)

In [43]:
google_data.isnull().sum()

# As seen in the above table, there are no more null values for the "Ratings" variable.

We are now going to print the mode values for non-number variables, starting with "Type".

In [44]:
print(google_data['Type']).mode()

Next, "Current Version".

In [None]:
print(google_data['Current Ver']).mode()

Lastly, "Android Version".

In [None]:
print(google_data['Android Ver']).mode()

We are now going to fill in all the remaining "null" values with the modes for their respective variables.

In [None]:
google_data['Type'].fillna(str(google_data['Type'].mode().values[0]), inplace=True)
google_data['Current Ver'].fillna(str(google_data['Current Ver'].mode().values[0]), inplace=True)
google_data['Android Ver'].fillna(str(google_data['Android Ver'].mode().values[0]), inplace=True)