In [12]:
import pandas as pd
import numpy as np

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
google_play_path = '/content/drive/My Drive/Colab Notebooks/Google-Playstore.csv'  # Update the path
app_store_path = '/content/drive/My Drive/Colab Notebooks/appleAppData.csv'

In [15]:
google_play_data = pd.read_csv(google_play_path)

In [16]:
app_store_data = pd.read_csv(app_store_path)

In [18]:
google_play_data = google_play_data.drop_duplicates(subset='App Name')  # Remove duplicate apps
google_play_data = google_play_data.dropna(subset=['Rating', 'Installs'])  # Remove rows with missing values

In [19]:
google_play_data['Installs'] = google_play_data['Installs'].str.replace('[+,]', '', regex=True).astype(int)

In [28]:
google_play_data['Size'] = pd.to_numeric(google_play_data['Size'].str.replace('M', '').str.replace('k', '').replace('Varies with device', np.nan), errors='coerce')

In [29]:
app_store_data = app_store_data.dropna(subset=['Average_User_Rating', 'Reviews', 'Size_Bytes', 'Price'])  # Drop rows with missing values
app_store_data['Size_MB'] = app_store_data['Size_Bytes'] / (1024 ** 2)  # Convert bytes to MB

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app_store_data['Size_MB'] = app_store_data['Size_Bytes'] / (1024 ** 2)  # Convert bytes to MB


In [20]:
app_store_data = app_store_data.dropna(subset=['Average_User_Rating'])  # Remove rows with missing user ratings

In [32]:
def analyze_google_play(data):
    print("\nGoogle Play Store Analysis")
    category_stats = data.groupby('Category').agg({
        'Installs': 'mean',
        'Rating': 'mean',
        'Rating Count': 'mean',
        'Size': 'mean',
        'Price': 'mean'
    }).sort_values(by='Installs', ascending=False)

    print("\nCategory Statistics (Google Play):")
    print(category_stats)
    return category_stats

In [33]:
def analyze_app_store(data):
    print("\nApp Store Analysis")
    genre_stats = data.groupby('Primary_Genre').agg({
        'Average_User_Rating': 'mean',
        'Reviews': 'mean',
        'Size_MB': 'mean',
        'Price': 'mean'
    }).sort_values(by='Average_User_Rating', ascending=False)

    print("\nGenre Statistics (App Store):")
    print(genre_stats)
    return genre_stats

In [34]:
google_play_stats = analyze_google_play(google_play_data)
app_store_stats = analyze_app_store(app_store_data)


Google Play Store Analysis

Category Statistics (Google Play):
                             Installs    Rating  Rating Count       Size  \
Category                                                                   
Video Players & Editors  1.344494e+06  2.887002  18885.424247  32.897878   
Communication            9.047840e+05  2.116807  10077.768169  26.521100   
Racing                   8.710739e+05  2.969274  16362.054835  45.220440   
Action                   6.574906e+05  2.725329  21347.510356  43.957904   
Photography              5.603846e+05  2.513765   6393.654430  22.321482   
Simulation               5.156109e+05  3.223337  10230.654715  46.842337   
Strategy                 5.118699e+05  2.911381  29498.437221  48.067671   
Role Playing             4.906358e+05  3.393324  14369.326220  53.185379   
Tools                    4.890459e+05  2.335021   3212.983823  32.221158   
Social                   4.016063e+05  2.396545  11088.713121  24.662565   
Weather                 

In [35]:
def recommend_apps(google_stats, app_stats):
    print("\nRecommendations:")

    # Google Play Insights
    top_google_category = google_stats.index[0]
    top_google_installs = google_stats.loc[top_google_category, 'Installs']
    print(f"Google Play - Most Popular Category: {top_google_category} with {top_google_installs:,.0f} average installs.")

    # App Store Insights
    top_app_genre = app_stats.index[0]
    top_app_rating = app_stats.loc[top_app_genre, 'Average_User_Rating']
    print(f"App Store - Most Popular Genre: {top_app_genre} with an average user rating of {top_app_rating:.2f}.")

    # Custom Recommendations
    print("\nAdditional Recommendations:")
    print("1. Focus on categories with high installs and ratings on Google Play.")
    print("2. Optimize app size to attract more users, as smaller apps are often more appealing.")
    print("3. For the App Store, consider genres with high user ratings and reviews to maximize engagement.")

recommend_apps(google_play_stats, app_store_stats)


Recommendations:
Google Play - Most Popular Category: Video Players & Editors with 1,344,494 average installs.
App Store - Most Popular Genre: Weather with an average user rating of 2.59.

Additional Recommendations:
1. Focus on categories with high installs and ratings on Google Play.
2. Optimize app size to attract more users, as smaller apps are often more appealing.
3. For the App Store, consider genres with high user ratings and reviews to maximize engagement.
