# Looking into game sales - at a regional level

In [49]:
# Import CSV and convert to a dataframe
import pandas as pd
import numpy as np
from scipy.stats import chisquare
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.linear_model import LinearRegression

sales = pd.read_csv("vgsales_20072016.csv")

sales[0:3]

Unnamed: 0,Rank,Name,Platform,Release_Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,3,Mario Kart Wii,Wii,2008,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
1,4,Wii Sports Resort,Wii,2009,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
2,9,New Super Mario Bros. Wii,Wii,2009,Platform,Nintendo,14.59,7.06,4.7,2.26,28.62


## Genre - Market share

Finding from Tableau: Action, shooter, sports are ranked at 1st through 3rd of most selling games except in Japan
* Global: Action > Shooter > Sports > Misc > Role-Palying
* NA: Action > Shooter > Sports
* EU: Action > Shooter > Sports
* JP: Role-Palying > Action > Misc
* Other: Action > Sports > Shooter

Null hypothesis: 3 major genres take up same portion of the total sum amount of sale in 3 regions

In [50]:
major_g_s = sales[(sales.Genre =='Action') | (sales.Genre =='Sports') | (sales.Genre =='Shooter')]

print("% of 3 major genres Globally (sum of sales): ", round(sum(major_g_s.Global_Sales)/sum(sales.Global_Sales)*100,2), "%")
print("% of 3 major genres in North America (sum of sales): ", round(sum(major_g_s.NA_Sales)/sum(sales.NA_Sales)*100,2), "%")
print("% of 3 major genres in Europe (sum of sales): ", round(sum(major_g_s.EU_Sales)/sum(sales.EU_Sales)*100,2), "%")
print("% of 3 major genres in Japan (sum of sales): ", round(sum(major_g_s.JP_Sales)/sum(sales.JP_Sales)*100,2), "%")
print("% of 3 major genres in Other region (sum of sales): ", round(sum(major_g_s.Other_Sales)/sum(sales.Other_Sales)*100,2), "%")

% of 3 major genres Globally (sum of sales):  53.06 %
% of 3 major genres in North America (sum of sales):  54.56 %
% of 3 major genres in Europe (sum of sales):  56.58 %
% of 3 major genres in Japan (sum of sales):  31.56 %
% of 3 major genres in Other region (sum of sales):  57.77 %


In [51]:
# Run chi-square test of independence among 3 regions for the portion of 3 major generes in total sales

g_s = np.array(([sum(major_g_s.NA_Sales),sum(sales.NA_Sales)],[sum(major_g_s.EU_Sales),sum(sales.EU_Sales)],
                [sum(major_g_s.Other_Sales),sum(sales.Other_Sales)]))
stats.chi2_contingency(g_s) 

# p-value: 0.70922423161140546

(0.68716707567401725,
 0.70922423161140546,
 2,
 array([[ 1168.71166259,  2099.85833741],
        [  770.0030528 ,  1383.4869472 ],
        [  284.57528461,   511.30471539]]))

## Genre - Product performance

Finding from Tableau: In all regions, shooter, platform, sport, racing are most profitable genres per game except in Japan  
* Popular genres vary per region as follows (avg of sales):
* Global: Shooter > Platform  > Sports > Racing > Role-Palying
* NA: Shooter > Platform  > Sports
* EU: Shooter > Platform  > Racing
* JP: Role-Palying > Platform > Fighting > Action
* Other: Shooter > Platform  > Racing

Null hypothesis: The ratio of average sales of 4 major genres to the rest of the genres is the same throughout the region except Japan

In [52]:
major_g_a = sales[(sales.Genre =='Platform') | (sales.Genre =='Shooter') | 
              (sales.Genre =='Sports')| (sales.Genre =='Racing')]
minor_g_a = sales[(sales.Genre !='Platform') & (sales.Genre !='Shooter') & 
              (sales.Genre !='Sports')& (sales.Genre !='Racing')]

print("The ratio of 4 major genres to the rest of genres Globally (avg of sales): ", 
      round(np.mean(major_g_a.Global_Sales)/np.mean(minor_g_a.Global_Sales)*100,2), "%")
print("The ratio of 4 major genres to the rest of genres in North America (avg of sales): ", 
      round(np.mean(major_g_a.NA_Sales)/np.mean(minor_g_a.NA_Sales)*100,2), "%")
print("The ratio of 4 major genres to the rest of genres in Europe (avg of sales): ", 
      round(np.mean(major_g_a.EU_Sales)/np.mean(minor_g_a.EU_Sales)*100,2), "%")
print("The ratio of 4 major genres to the rest of genres in Japan (avg of sales): ", 
      round(np.mean(major_g_a.JP_Sales)/np.mean(minor_g_a.JP_Sales)*100,2), "%")
print("The ratio of 4 major genres to the rest of genres in Other region (avg of sales): ", 
      round(np.mean(major_g_a.Other_Sales)/np.mean(minor_g_a.Other_Sales)*100,2), "%")

The ratio of 4 major genres to the rest of genres Globally (avg of sales):  179.26 %
The ratio of 4 major genres to the rest of genres in North America (avg of sales):  189.34 %
The ratio of 4 major genres to the rest of genres in Europe (avg of sales):  211.1 %
The ratio of 4 major genres to the rest of genres in Japan (avg of sales):  64.6 %
The ratio of 4 major genres to the rest of genres in Other region (avg of sales):  206.99 %


In [53]:
# Run chi-square test of independence among 3 regions 
# for the ratio of 4 major genres to the rest of genres in average sales

g_a = np.array(([np.mean(major_g_a.NA_Sales),np.mean(minor_g_a.NA_Sales)],
                  [np.mean(major_g_a.EU_Sales),np.mean(minor_g_a.EU_Sales)],
                 [np.mean(major_g_a.Other_Sales),np.mean(minor_g_a.Other_Sales)]))
stats.chi2_contingency(g_a) 

# p-value: 0.99969357605034559

(0.00061294181413150796,
 0.99969357605034559,
 2,
 array([[ 0.35477834,  0.17843619],
        [ 0.23665963,  0.11902825],
        [ 0.08640879,  0.0434594 ]]))

## Genre - Japan vs. rest of market

Finding from Tableau: Japan doesn’t share the popular genres in terms of sum of sales (Role-Palying > Action > Misc) and average of sales (Role-Palying > Platform > Fighting > Action) with the rest of regions. 

Null hypothesis: The portion of 3 major genres of Japan in the Japanese market and the portion of global 3 major genres in the rest of the market are the same. 

In [54]:
major_g_s_jp = sales[(sales.Genre =='Role-Playing') | (sales.Genre =='Misc') | (sales.Genre =='Action')]

print("% of global 3 major genres in the NA, EU, Other regions (sum of sales): ", 
      round((sum(major_g_s.Global_Sales)-sum(major_g_s.JP_Sales))/(sum(sales.Global_Sales)-sum(sales.JP_Sales))*100,2), "%")
print("% of 3 major genres of Japan in the Japanese market (sum of sales): ", 
      round(sum(major_g_s_jp.JP_Sales)/sum(sales.JP_Sales)*100,2), "%")

% of global 3 major genres in the NA, EU, Other regions (sum of sales):  55.65 %
% of 3 major genres of Japan in the Japanese market (sum of sales):  60.41 %


In [55]:
# Run chi-square test of independence to compare the portion of main 3 genres are the same 
# in Japan and the rest of markets

g_s_jp = np.array(([sum(major_g_s_jp.JP_Sales),sum(sales.JP_Sales)],
                   [sum(major_g_s.NA_Sales+major_g_s.EU_Sales+major_g_s.Other_Sales),
                   sum(sales.NA_Sales+sales.EU_Sales+sales.Other_Sales)]))
                      
stats.chi2_contingency(g_s_jp) 

# p-values: 0.3179101094447937

(0.99752507416689462,
 0.3179101094447937,
 1,
 array([[  277.6006066,   494.2393934],
        [ 2236.3493934,  3981.5906066]]))

In [56]:
major_g_a_jp = sales[(sales.Genre =='Role-Palying') | (sales.Genre =='Platform') | 
              (sales.Genre =='Fighting')| (sales.Genre =='Action')]
minor_g_a_jp = sales[(sales.Genre !='Role-Palying') & (sales.Genre !='Platform') & 
              (sales.Genre !='Fighting')& (sales.Genre !='Action')]

print("The ratio of 4 major genres to the rest of genres in the NA, EU, Other regions: ", 
      round(np.mean(major_g_a.Global_Sales-major_g_a.JP_Sales)/np.mean(minor_g_a.Global_Sales-minor_g_a.JP_Sales)*100,2), "%")
print("The ratio of 4 major genres to the rest of genres in Japan: ", 
      round(np.mean(major_g_a_jp.JP_Sales)/np.mean(minor_g_a_jp.JP_Sales)*100,2), "%")

The ratio of 4 major genres to the rest of genres in the NA, EU, Other regions:  198.74 %
The ratio of 4 major genres to the rest of genres in Japan:  100.81 %


In [57]:
# Run chi-square test of independence to see whether the ratio of average sales of 4 most profitable genres in Japan 
# to the rest of the genres is as high as the ratio of average sales of 4 most profitable genres globally 
# in the rest of the world to other genres

g_a_jp = np.array(([np.mean(major_g_a_jp.JP_Sales),np.mean(minor_g_a_jp.JP_Sales)],
                   [np.mean(major_g_a.NA_Sales+major_g_a.EU_Sales+major_g_a.Other_Sales),
                   np.mean(minor_g_a.NA_Sales+minor_g_a.EU_Sales+minor_g_a.Other_Sales)]))
                      
stats.chi2_contingency(g_a_jp) 

# p-value: 0.00098025125039657946

(10.864501662786338,
 0.00098025125039657946,
 1,
 array([[ 0.06810032,  0.03664856],
        [ 0.66233265,  0.35643795]]))

## Publisher - Market share

Finding from Tableau: In terms of sum of sales, a few publishers such as  Nintendo, Electronic Arts, Activision do well throughout the regions except Japan -  Electronic Arts and Activision are not in the forefront in Japan unlike elsewhere.

* Global Top publishers, In terms of total sales: Nitendo > Electronic Arts > Activision
* Top publishers in North America, In terms of total sales: Activision > Electronic Arts > Nintendo
* Top publishers in Europe, In terms of total sales: Electronic Arts > Nintendo > Activision
* Top publishers in Japan, In terms of total sales: Nitendo >  Namco Bandai Games > Capcom
* Top publishers in other region, In terms of total sales: Electronic Arts >  Activision > Nintendo

Null hypothesis: 3 major publishers take up the same portion in the sum of sales in 3 regions

In [58]:
# Run chi-square test of independence among 3 regions for the portion of 3 major publishers in total sales

major_p_s = sales[(sales.Publisher =='Nintendo') | (sales.Publisher =='Electronic Arts') 
                  | (sales.Publisher =='Activision')]

print("% of 3 major pubslisher Globally (sum of sales): ", round(sum(major_p_s.Global_Sales)/sum(sales.Global_Sales)*100,2), "%")
print("% of 3 major pubslisher in North America (sum of sales): ", round(sum(major_p_s.NA_Sales)/sum(sales.NA_Sales)*100,2), "%")
print("% of 3 major pubslisher in Europe (sum of sales): ", round(sum(major_p_s.EU_Sales)/sum(sales.EU_Sales)*100,2), "%")
print("% of 3 major pubslisher in Japan (sum of sales): ", round(sum(major_p_s.JP_Sales)/sum(sales.JP_Sales)*100,2), "%")
print("% of 3 major pubslisher in Other region (sum of sales): ", round(sum(major_p_s.Other_Sales)/sum(sales.Other_Sales)*100,2), "%")

% of 3 major pubslisher Globally (sum of sales):  38.69 %
% of 3 major pubslisher in North America (sum of sales):  38.54 %
% of 3 major pubslisher in Europe (sum of sales):  40.44 %
% of 3 major pubslisher in Japan (sum of sales):  35.21 %
% of 3 major pubslisher in Other region (sum of sales):  37.98 %


In [59]:
# Run chi-square test of independence between 3 major publishers and the rest of publishers - in sum of sales, NA & EU

p_s = np.array(([sum(major_p_s.NA_Sales),sum(sales.NA_Sales)],
                [sum(major_p_s.EU_Sales),sum(sales.EU_Sales)],
                [sum(major_p_s.Other_Sales),sum(sales.Other_Sales)]))
stats.chi2_contingency(p_s) 

# p-value: 0.70833899021495483

(0.68966500027560074,
 0.70833899021495483,
 2,
 array([[  823.9211693 ,  2105.9588307 ],
        [  543.18032608,  1388.37967392],
        [  195.73850461,   500.31149539]]))

## Publisher - Product performance

Finding from Tableau: Different publishers outperfom in each region for average sales amount.  

* Top publishers globally in terms of average of sales: Nitendo > Microsoft Game Studios > Valve > Hello Games > Sony Computer Entertainment
* Top publishers in North America, In terms of average of sales: Microsoft Game Studios > RedOctane > Nintendo
* Top publishers in Europe, In terms of average of sales: Hello Games > Nintendo > Valve
* Top publishers in Japan, In terms of average of sales: Mixi > Nintendo > Level 5
* Top publishers in other region, In terms of average of sales: Hello Games > Valve > Sony Computer Entertainment Europe

Null hypothesis: Top 3 publishers for each region vary in every region but the ratio of average sales of 3 major publishers of each region to the rest of the publishers are the same throughout the region 

In [60]:
major_p_a_NA = sales[(sales.Publisher =='Microsoft Game Studios') | (sales.Publisher =='Nintendo') 
                  | (sales.Publisher =='RedOctane')]
minor_p_a_NA = sales[(sales.Publisher !='Microsoft Game Studios') & (sales.Publisher !='Nintendo') 
                  & (sales.Publisher !='RedOctane')]

major_p_a_EU = sales[(sales.Publisher =='Hello Games') | (sales.Publisher =='Nintendo') 
                  | (sales.Publisher =='Valve Software')]
minor_p_a_EU = sales[(sales.Publisher !='Hello Games') & (sales.Publisher !='Nintendo') 
                  & (sales.Publisher !='Valve Software')]

major_p_a_JP = sales[(sales.Publisher =='Nintendo') | (sales.Publisher =='mixi, Inc') 
                  | (sales.Publisher =='Level 5')]
minor_p_a_JP = sales[(sales.Publisher !='Enix Corporation') & (sales.Publisher !='mixi, Inc') 
                  & (sales.Publisher !='Level 5')]

major_p_a_OT = sales[(sales.Publisher =='Hello Games') | (sales.Publisher =='Valve Software') 
                  | (sales.Publisher =='Sony')]
minor_p_a_OT = sales[(sales.Publisher !='Hello Games') & (sales.Publisher !='Valve Software') 
                  & (sales.Publisher !='Sony Computer Entertainment Europe')]

print("Average sales amount of 3 major publishers in North America is", 
      round(np.mean(major_p_a_NA.NA_Sales)/np.mean(minor_p_a_NA.NA_Sales)*100,2), 
      "% larger than the average of the rest")
print("Average sales amount of 3 major publishers in Europe is", 
      round(np.mean(major_p_a_EU.EU_Sales)/np.mean(minor_p_a_EU.EU_Sales)*100,2), 
      "% larger than the average of the rest")
print("Average sales amount of 3 major publishers in Japan is", 
      round(np.mean(major_p_a_JP.JP_Sales)/np.mean(minor_p_a_JP.JP_Sales)*100,2), 
      "% larger than the average of the rest")
print("Average sales amount of 3 major publishers in Other region is", 
      round(np.mean(major_p_a_OT.Other_Sales)/np.mean(minor_p_a_OT.Other_Sales)*100,2), 
      "% larger than the average of the rest")

Average sales amount of 3 major publishers in North America is 489.6 % larger than the average of the rest
Average sales amount of 3 major publishers in Europe is 470.28 % larger than the average of the rest
Average sales amount of 3 major publishers in Japan is 1099.84 % larger than the average of the rest
Average sales amount of 3 major publishers in Other region is 265.93 % larger than the average of the rest


In [61]:
# Run chi-square test of independence among 4 regions 
# for the ratio of average sale of 3 major publishers of each region to the one of rest of publishers in each region 

p_a = np.array(([np.mean(major_p_a_NA.NA_Sales),np.mean(minor_p_a_NA.NA_Sales)],
                  [np.mean(major_p_a_EU.EU_Sales),np.mean(minor_p_a_EU.EU_Sales)],
                [np.mean(major_p_a_JP.JP_Sales),np.mean(minor_p_a_JP.JP_Sales)],
                 [np.mean(major_p_a_OT.Other_Sales),np.mean(minor_p_a_OT.Other_Sales)]))

stats.chi2_contingency(p_a) 

# p-value: 0.99723334529765295

(0.048113181700491954,
 0.99723334529765295,
 3,
 array([[ 0.98501062,  0.18692837],
        [ 0.64385639,  0.12218652],
        [ 0.51519222,  0.09776954],
        [ 0.16770079,  0.03182507]]))

## Publisher - Japan vs. rest of market

Finding from Tableau: Japan doesn’t share the popular publishers in terms of sum of sales (Nitendo >  Namco Bandai Games > Capcom) 

Null hypothesis: The portion of 3 major publisher of Japan in the Japanese market and the portion of global 3 major publishers in the rest of the market are the same. 

In [62]:
major_p_s_jp = sales[(sales.Publisher =='Nintendo') | (sales.Publisher =='Namco Bandai Games') 
                  | (sales.Publisher =='Capcom')]

print("% of global 3 major publisher in the NA, EU, Other regions (sum of sales): ", 
      round((sum(major_p_s.Global_Sales)-sum(major_p_s.JP_Sales))/(sum(sales.Global_Sales)-sum(sales.JP_Sales))*100,2), "%")
print("% of 3 major publisher of Japan in the Japanese market (sum of sales): ", 
      round(sum(major_p_s_jp.JP_Sales)/sum(sales.JP_Sales)*100,2), "%")

% of global 3 major publisher in the NA, EU, Other regions (sum of sales):  39.11 %
% of 3 major publisher of Japan in the Japanese market (sum of sales):  53.34 %


In [63]:
# Run chi-square test of independence to compare the portion of main 3 publishers are the same 
# in Japan and the rest of markets

p_s_jp = np.array(([sum(major_p_s_jp.JP_Sales),sum(sales.JP_Sales)],
                   [sum(major_p_s.NA_Sales+major_p_s.EU_Sales+major_p_s.Other_Sales),
                   sum(sales.NA_Sales+sales.EU_Sales+sales.Other_Sales)]))
                      
stats.chi2_contingency(p_s_jp) 

# p-values: 0.00020749325953488894

(13.761980215267526,
 0.00020749325953488894,
 1,
 array([[  213.26074439,   524.59925561],
        [ 1606.25925561,  3951.23074439]]))

## Platform - Market share

Finding from Tableau: Top 3 platforms in sum amount of sales are X360 > Wii > PS3  in all regions except Japan.  

* Top platforms globally, In terms of sum of sales: PS3 > X360 >Wii 
* Top platforms in North America, In terms of sum of sales:  X360 > Wii > PS3
* Top platforms in Europe, In terms of sum of sales: PS3 > X360 > Wii
* Top platforms in Japan, In terms of sum of sales:  DS > 3DS > PS3
* Top platforms in other region, In terms of sum of sales: PS3 > X360 > Wii

Null hypothesis: 3 major platforms take up the same portion in the sum of sales

In [64]:
major_pl_s = sales[(sales.Platform =='X360') | (sales.Platform =='PS3')|(sales.Platform =='Wii')]

print("% of 4 major platforms Globally (sum of sales): ", round(sum(major_pl_s.Global_Sales)/sum(sales.Global_Sales)*100,2), "%")
print("% of 4 major platforms in North America (sum of sales): ", round(sum(major_pl_s.NA_Sales)/sum(sales.NA_Sales)*100,2), "%")
print("% of 4 major platforms in Europe (sum of sales): ", round(sum(major_pl_s.EU_Sales)/sum(sales.EU_Sales)*100,2), "%")
print("% of 4 major platforms in Japan (sum of sales): ", round(sum(major_pl_s.JP_Sales)/sum(sales.JP_Sales)*100,2), "%")
print("% of 4 major platforms in other region (sum of sales): ", round(sum(major_pl_s.Other_Sales)/sum(sales.Other_Sales)*100,2), "%")

% of 4 major platforms Globally (sum of sales):  58.28 %
% of 4 major platforms in North America (sum of sales):  64.23 %
% of 4 major platforms in Europe (sum of sales):  59.61 %
% of 4 major platforms in Japan (sum of sales):  30.78 %
% of 4 major platforms in other region (sum of sales):  55.97 %


In [65]:
# Run chi-square test of independence among 3 regions for the portion of 3 major generes in total sales

pl_s = np.array(([sum(major_pl_s.NA_Sales),sum(sales.NA_Sales)],
                 [sum(major_pl_s.EU_Sales),sum(sales.EU_Sales)],
                [sum(major_pl_s.Other_Sales),sum(sales.Other_Sales)]))

stats.chi2_contingency(pl_s) 

# p-value: 0.15979194880824499

(3.6677652596927057,
 0.15979194880824499,
 2,
 array([[ 1323.93970939,  2149.30029061],
        [  836.79176994,  1358.45823006],
        [  299.91852067,   486.89147933]]))

## Platform - Product performance

Finding from Tableau: Top 3 platforms in average amount of sales differ across the region. 
* Top platforms globally, In terms of average sales: PS4 > X360 > PS3 > XOne > Wii
* Top platforms in North America, In terms of average sales: X360 > GBA > XOne
* Top platforms in Europe, In terms of average sales:  PS4 > PS3 > X360 
* Top platforms in Japan, In terms of average sales: 3DS > WiiU > PSP
* Top platforms in other region, In terms of average sales:  PS4 > PS3 > PS2

Null hypothesis: Top 3 platforms for each region vary in every region but the ratio of average sales of 3 major platforms of each region to the rest of the platforms are the same throughout the region 

In [66]:
major_pl_a_NA = sales[(sales.Platform =='X360') | (sales.Platform =='GBA') | (sales.Platform =='PS3')]
minor_pl_a_NA = sales[(sales.Platform !='X360') & (sales.Platform !='GBA') & (sales.Platform !='PS3')]

major_pl_a_EU = sales[(sales.Platform =='PS4') | (sales.Platform =='PS3') | (sales.Platform =='X360')]
minor_pl_a_EU = sales[(sales.Platform !='PS4') & (sales.Platform !='PS3') & (sales.Platform !='X360')]

major_pl_a_JP = sales[(sales.Platform =='3DS') | (sales.Platform =='WiiU') | (sales.Platform =='PSP')]
minor_pl_a_JP = sales[(sales.Platform !='3DS') & (sales.Platform !='WiiU') & (sales.Platform !='PSP')]

major_pl_a_OT = sales[(sales.Platform =='PS4') | (sales.Platform =='PS3') | (sales.Platform =='PS2')]
minor_pl_a_OT = sales[(sales.Platform !='PS4') & (sales.Platform !='PS3') & (sales.Platform !='PS2')]

print("Average sales amount of 3 major platforms in North America is", 
      round(np.mean(major_pl_a_NA.NA_Sales)/np.mean(minor_pl_a_NA.NA_Sales)*100,2), 
      "% larger than the average of the rest")
print("Average sales amount of 3 major platforms in Europe is", 
      round(np.mean(major_pl_a_EU.EU_Sales)/np.mean(minor_pl_a_EU.EU_Sales)*100,2), 
      "% larger than the average of the rest")
print("Average sales amount of 3 major platforms in Japan is", 
      round(np.mean(major_pl_a_JP.JP_Sales)/np.mean(minor_pl_a_JP.JP_Sales)*100,2), 
      "% larger than the average of the rest")
print("Average sales amount of 3 major platforms in Other region is", 
      round(np.mean(major_pl_a_OT.Other_Sales)/np.mean(minor_pl_a_OT.Other_Sales)*100,2), 
      "% larger than the average of the rest")

Average sales amount of 3 major platforms in North America is 223.64 % larger than the average of the rest
Average sales amount of 3 major platforms in Europe is 261.98 % larger than the average of the rest
Average sales amount of 3 major platforms in Japan is 288.23 % larger than the average of the rest
Average sales amount of 3 major platforms in Other region is 284.52 % larger than the average of the rest


In [67]:
# Run chi-square test of independence among 4 regions 
# for the ratio of average sale of 3 major platforms of each region to the one of rest of platforms in each region 

pl_a = np.array(([np.mean(major_pl_a_NA.NA_Sales),np.mean(minor_pl_a_NA.NA_Sales)],
                  [np.mean(major_pl_a_EU.EU_Sales),np.mean(minor_pl_a_EU.EU_Sales)],
                [np.mean(major_pl_a_JP.JP_Sales),np.mean(minor_pl_a_JP.JP_Sales)],
                 [np.mean(major_pl_a_OT.Other_Sales),np.mean(minor_pl_a_OT.Other_Sales)]))

stats.chi2_contingency(pl_a) 

# p-value: 0.99996252651521211

(0.0027091859532259923,
 0.99996252651521211,
 3,
 array([[ 0.40075822,  0.16126908],
        [ 0.26038183,  0.10478023],
        [ 0.11011458,  0.0443112 ],
        [ 0.10491447,  0.04221862]]))

## Platform - Japan vs. rest of market

Finding from Tableau: Japan doesn’t share the popular platforms in terms of sum of sales 

Null hypothesis: The portion of 3 major platforms of Japan in the Japanese market and the portion of global 3 major platforms in the rest of the market are the same. 

In [68]:
major_pl_s_jp = sales[(sales.Platform =='DS') | (sales.Platform =='PS3')| (sales.Platform =='3DS')]

print("% of global 4 major platform in the NA, EU, Other regions (sum of sales): ", 
      round((sum(major_pl_s.Global_Sales)-sum(major_pl_s.JP_Sales))/(sum(sales.Global_Sales)-sum(sales.JP_Sales))*100,2), "%")
print("% of 4 major platform of Japan in the Japanese market (sum of sales): ", 
      round(sum(major_pl_s_jp.JP_Sales)/sum(sales.JP_Sales)*100,2), "%")

% of global 4 major platform in the NA, EU, Other regions (sum of sales):  61.59 %
% of 4 major platform of Japan in the Japanese market (sum of sales):  57.85 %


In [69]:
# Run chi-square test of independence to compare the portion of main 3 platforms are the same 
# in Japan and the rest of markets

pl_s_jp = np.array(([sum(major_pl_s_jp.JP_Sales),sum(sales.JP_Sales)],
                   [sum(major_pl_s.NA_Sales+major_pl_s.EU_Sales+major_pl_s.Other_Sales),
                   sum(sales.NA_Sales+sales.EU_Sales+sales.Other_Sales)]))
                      
stats.chi2_contingency(pl_s_jp) 

# p-values: 0.45351646263207468

(0.56184837206753357,
 0.45351646263207468,
 1,
 array([[  288.35286125,   471.19713875],
        [ 2450.66713875,  4004.63286125]]))