In [46]:
import pandas as pd
import numpy as np
from pyecharts import options as opts
from pyecharts.charts import Pie,Line,Bar

In [47]:
data = pd.read_csv('vgsales.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16598 entries, 0 to 16597
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Name          16598 non-null  object 
 1   Platform      16598 non-null  object 
 2   Year          16327 non-null  object 
 3   Genre         16598 non-null  object 
 4   Publisher     16540 non-null  object 
 5   NA_Sales      16598 non-null  float64
 6   EU_Sales      16598 non-null  float64
 7   JP_Sales      16598 non-null  float64
 8   Other_Sales   16598 non-null  float64
 9   Global_Sales  16596 non-null  float64
dtypes: float64(5), object(5)
memory usage: 1.3+ MB


In [48]:
data.describe()

Unnamed: 0,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
count,16598.0,16598.0,16598.0,16598.0,16596.0
mean,0.264667,0.146659,0.077774,0.04807,0.537498
std,0.816683,0.50535,0.309292,0.188588,1.555113
min,0.0,0.0,0.0,0.0,0.01
25%,0.0,0.0,0.0,0.0,0.06
50%,0.08,0.02,0.0,0.01,0.17
75%,0.24,0.11,0.04,0.04,0.47
max,41.49,29.02,10.22,10.57,82.74


In [90]:
data['Year'].fillna(data['Year'].mode()[0], inplace=True)
data['Publisher'].fillna(data['Publisher'].mode()[0], inplace=True)
data['Global_Sales'].fillna(data['Global_Sales'].mode()[0], inplace=True)

In [50]:
# data.dropna(axis=0).reset_index(drop=True)
data.isnull().sum()

Name            0
Platform        0
Year            0
Genre           0
Publisher       0
NA_Sales        0
EU_Sales        0
JP_Sales        0
Other_Sales     0
Global_Sales    0
dtype: int64

# 电子游戏市场分析

In [51]:
#各个平台发布游戏数量对比
data_Platform = data['Platform'].value_counts()
# data_Platform.index.tolist()
# data_Platform.values.tolist()

In [52]:
#图
c = (
    Pie()
    .add(
        "平台",
        [list(z) for z in zip(data_Platform.index.tolist(), data_Platform.values.tolist())],
        radius=["40%", "65%"],center=["40%", "50%"]
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="各个平台发布游戏数量对比"),
        legend_opts=opts.LegendOpts(orient="vertical",pos_left="80%",type_="scroll",
    ))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{a} {b}: {c} ({d}%)",))
    .render("各个平台发布游戏数量对比.html")
)


In [53]:
#电子游戏的全球销售额趋势
year_sales_glo = data.loc[:, ['Year', 'Global_Sales']].groupby(by =  'Year'  ).sum()
# year_sales_glo.index.tolist()
# year_sales_glo.values.tolist()

In [54]:
#图
c = (
    Line()
    .add_xaxis(year_sales_glo.index.tolist())
    .add_yaxis("全球销售额", year_sales_glo.values.tolist(), is_smooth=False)
    .set_global_opts(title_opts=opts.TitleOpts(title="游戏销售额发展趋势"))
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .render("全球游戏销售额发展趋势.html")
)

In [55]:
#全球电子游戏销售额发展趋势（按地区）
year_sales = data.loc[:, ['Year', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']].groupby(by =  'Year'  ).sum()
NA_Sales = year_sales['NA_Sales'].values.tolist()
EU_Sales = year_sales['EU_Sales'].values.tolist()
JP_Sales = year_sales['JP_Sales'].values.tolist()
Other_Sales = year_sales['Other_Sales'].values.tolist()
year_area = year_sales.index.tolist()

In [56]:
#图
c = (
    Line()
    .add_xaxis(year_area)
    .add_yaxis("北美销售额",NA_Sales)
    .add_yaxis("欧洲销售额",EU_Sales, is_smooth=False)
    .add_yaxis("日本销售额", JP_Sales, is_smooth=False)
    .add_yaxis("其他地区销售额",Other_Sales, is_smooth=False)
    .set_global_opts(title_opts=opts.TitleOpts(title="不同地区的游戏销售额",subtitle='总销售额（百万美元）'),)
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .render("不同地区的游戏销售额发展趋势.html")
)

In [83]:
from pyecharts.charts import Grid

#受欢迎的游戏类型
pop_genre_mean = data.groupby('Genre')['Global_Sales'].mean().round(2)
pop_genre_sum = data.groupby('Genre')['Global_Sales'].sum().round(2)
# pop_genre_mean


#图
bar = (
    Bar()
    .add_xaxis(pop_genre_sum.index.tolist())
    .add_yaxis("不同类型的总销售额", pop_genre_sum.values.tolist())
    .set_global_opts(title_opts=opts.TitleOpts(title="受欢迎的游戏类型"),
                    legend_opts = opts.LegendOpts(is_show = False),
                    )
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
)

bar1 = (
    Bar()
    .add_xaxis(pop_genre_mean.index.tolist())
    .add_yaxis("不同类型的平均销售额", pop_genre_mean.values.tolist())
    .set_global_opts(title_opts=opts.TitleOpts(title="受欢迎的游戏类型"),
                    legend_opts = opts.LegendOpts(is_show = False),
                    xaxis_opts = opts.AxisOpts(is_show = False))
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))

)
grid = Grid()
grid.add(bar, grid_opts=opts.GridOpts(pos_right="50%"))
grid.add(bar1, grid_opts=opts.GridOpts(pos_left="50%"))
grid.render('受欢迎的游戏类型对比.html')



'F:\\project\\qin\\其他\\电子游戏销售分析\\受欢迎的游戏类型对比.html'

In [115]:
#不同发行平台的销售量
pop_platform_sum = data.groupby(by='Platform')['Global_Sales'].sum().round(2)
pop_platform_mean = data.groupby(by='Platform')['Global_Sales'].mean().round(2)
pop_platform_count = data.groupby(by='Platform')['Platform'].count().round(2)

#图
bar = (
    Bar(init_opts=opts.InitOpts(width='800px', height='600px'))
    .add_xaxis(pop_platform_sum.index.tolist())
    .add_yaxis("每个戏平台全球范围内总的销售额", pop_platform_sum.values.tolist())
    .set_global_opts(title_opts=opts.TitleOpts(title="不同发行平台的销售量"),
                    legend_opts = opts.LegendOpts(is_show = False),
                    xaxis_opts = opts.AxisOpts(is_show = False))
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))

)

bar1 = (
    Bar(init_opts=opts.InitOpts(width='800px', height='600px'))
    .add_xaxis(pop_platform_mean.index.tolist())
    .add_yaxis("每个游戏平台全球范围内的平均销售额", pop_platform_mean.values.tolist())
    .set_global_opts(title_opts=opts.TitleOpts(title="不同发行平台的销售量"),
                    legend_opts = opts.LegendOpts(is_show = False),
                    xaxis_opts = opts.AxisOpts(is_show = False))
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))

)

bar2 = (
    Bar(init_opts=opts.InitOpts(width='800px', height='600px'))
    .add_xaxis(pop_platform_count.index.tolist())
    .add_yaxis("每个平台上发行的游戏数量", pop_platform_count.values.tolist())
    .set_global_opts(
        title_opts=opts.TitleOpts(title="不同发行平台的销售量"),
        legend_opts = opts.LegendOpts(is_show = False),
        xaxis_opts = opts.AxisOpts(is_show = False),
    )
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))

)
grid = Grid()
grid.add(bar, grid_opts=opts.GridOpts(pos_right="73%"))
grid.add(bar1, grid_opts=opts.GridOpts(pos_left="center"))
grid.add(bar2, grid_opts=opts.GridOpts(pos_left="73%"))
grid.render('不同发行平台的销售量对比.html')


'F:\\project\\qin\\其他\\电子游戏销售分析\\不同发行平台的销售量对比.html'