# Pràctica ll - Creació de la visualització i lliurament del projecte

## Ingestió de dades - Apps Google Play Store (2018)

In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.subplots as sp

In [None]:
data = pd.read_csv('/content/drive/MyDrive/3r Quadrimestre/Visualització de Dades/Google_data_cleaned.csv')

In [None]:
data.head()

Unnamed: 0.1,Unnamed: 0,app,category,rating,reviews,installs,type,price,content_rating,genres,current_ver,android_ver,size(kb),update_month,update_year
0,0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,10000,0,0.0,Everyone,Art & Design,1.0.0,4.0.3,19000.0,1,2018
1,1,Coloring book moana,ART_AND_DESIGN,3.9,967,500000,0,0.0,Everyone,Art & Design;Pretend Play,2.0.0,4.0.3,14000.0,1,2018
2,2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,5000000,0,0.0,Everyone,Art & Design,1.2.4,4.0.3,8.7,8,2018
3,3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,50000000,0,0.0,Teen,Art & Design,,4.2,25000.0,6,2018
4,4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,100000,0,0.0,Everyone,Art & Design;Creativity,1.1,4.4,2.8,6,2018


## Anàlisi general Apps Google Play Store

In [None]:


# App Distribution by Category
category_counts = data['category'].value_counts()
fig1 = go.Figure(data=[go.Bar(x=category_counts.index, y=category_counts)])
fig1.update_layout(title='App Distribution by Category')

# App Ratings Distribution
fig2 = go.Figure(data=[go.Histogram(x=data['rating'])])
fig2.update_layout(title='App Ratings Distribution')

# Reviews vs. Ratings
fig3 = go.Figure(data=[go.Scatter(x=data['reviews'], y=data['rating'], mode='markers')])
fig3.update_layout(title='Reviews vs. Ratings')

# Installations by Category
category_installs = data.groupby('category')['installs'].sum().sort_values(ascending=False)
fig4 = go.Figure(data=[go.Bar(x=category_installs.index, y=category_installs)])
fig4.update_layout(title='Installs by Category')

# Paid vs. Free Apps
type_counts = data['type'].value_counts()
labels = ['Paid' if val > 0 else 'Free' for val in type_counts.index]
fig5 = go.Figure(data=[go.Pie(labels=labels, values=type_counts)])
fig5.update_layout(title='Paid vs. Free Apps')

# Pricing Distribution
fig6 = go.Figure(data=[go.Histogram(x=data['price'])])
fig6.update_layout(title='Pricing Distribution')

# Content Rating Analysis
content_rating_counts = data['content_rating'].value_counts()
fig7 = go.Figure(data=[go.Bar(x=content_rating_counts.index, y=content_rating_counts)])
fig7.update_layout(title='Content Rating Analysis')

# Top Rated Apps
top_apps_by_reviews = data.nlargest(10, 'reviews')
top_apps_by_reviews = top_apps_by_reviews.sort_values('rating', ascending=False)  # Sort by rating
top_rated_apps = top_apps_by_reviews[['app', 'rating']]
fig8 = go.Figure(data=[go.Table(header=dict(values=['App', 'Rating']),
                               cells=dict(values=[top_rated_apps['app'], top_rated_apps['rating']]))])
fig8.update_layout(title='Top Rated Apps from the Top 10 Apps with the Largest Number of Reviews')



# Create the dashboard layout
fig = sp.make_subplots(rows=3, cols=2, subplot_titles=['Reviews vs. Ratings',
                                                       'App Ratings Distribution',
                                                       'Content Rating Analysis',
                                                       'Pricing Distribution',
                                                       'App Distribution by Category',
                                                       'Installations by Category'])

# Quines apps del Google store tenen les millors valoracions?
fig5.update_layout(height=500, width=1000)

# Quina és la distribució de apps de pagament i apps gratuïtes?
fig8.update_layout(height=450, width=1000)

# Hi ha alguna correlació entre el nombre de reviews i les valoracions?
fig.add_trace(fig3.data[0], row=1, col=1)

# Com es distribueixen les valoracions de les aplicacions?
fig.add_trace(fig2.data[0], row=1, col=2)

# Quines són les categories de classificació del contingut i quantes aplicacions pertanyen a cada categoria
fig.add_trace(fig7.data[0], row=2, col=1)

# Com estan distribuits els preus de les apps de Google Store?
fig.add_trace(fig6.data[0], row=2, col=2)

# Quines són les categories més populars a Google Play?
fig.add_trace(fig4.data[0], row=3, col=2)

# Quines categories tenen un major nombre d'instal·lacions?
fig.add_trace(fig1.data[0], row=3, col=1)

fig.update_layout(height=800, width=1000, title='Google Play Store Dashboard', showlegend=False)
fig.update_yaxes(type='log', row=2, col=2)


# Display the dashboard

fig8.show()
fig5.show()
fig.show()

## Anàlisi jocs Google Play Store

In [None]:
# Jocs gratuïts més descarregats vs jocs de pagament més descarregats
paid_games = games_category[games_category['type'] == 1].nlargest(10, 'installs')
free_games = games_category[games_category['type'] == 0].nlargest(10, 'installs')

fig3 = go.Figure()
fig3.add_trace(go.Bar(x=paid_games['app'], y=paid_games['installs'], name='Paid Games'))
fig3.add_trace(go.Bar(x=free_games['app'], y=free_games['installs'], name='Free Games'))
fig3.update_layout(title='Most Downloaded Games (Paid vs. Free)', xaxis_title='Game', yaxis_title='Number of Downloads')
fig3.update_layout(barmode='stack')
fig3.update_yaxes(type='log')

# Jocs millor valorats
top_rated_games = games_category.nlargest(10, 'reviews').sort_values('rating', ascending=False)
fig4 = go.Figure(data=[go.Bar(x=top_rated_games['app'], y=top_rated_games['rating'])])
fig4.update_layout(title='Top Rated Games', xaxis_title='Game', yaxis_title='Rating')

# Jocs que generen més facturació (installs * price)
games_category['revenue'] = games_category['installs'] * games_category['price']
top_revenue_games = games_category.nlargest(10, 'revenue')
fig5 = go.Figure(data=[go.Bar(x=top_revenue_games['app'], y=top_revenue_games['revenue'])])
fig5.update_layout(title='Games with Highest Revenue', xaxis_title='Game', yaxis_title='Revenue')

# Display the visualizations
fig3.show()
fig4.show()
fig5.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

