In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
df = pd.read_csv('./datasets/game_sales.csv')

In [3]:
# shuffle data
df = df.sample(frac=1)

In [4]:
df.head()

Unnamed: 0,name,platform,year,genre,publisher,na_sales,eu_sales,jp_sales,other_sales,global_sales,critic_score,critic_count,user_score,user_count,rating,date
5974,Spider-Man: Friend or Foe,X360,2007.0,Action,Activision,0.26,0.01,0.0,0.02,0.29,60.0,51.0,6.3,26.0,E10+,2007-03-15
8237,Pro Yaky? Spirits 5,PS2,2008.0,Sports,Konami Digital Entertainment,0.0,0.0,0.18,0.0,0.18,,,,,,2008-01-15
2312,MLB 2005,PS2,2004.0,Sports,Sony Computer Entertainment,0.44,0.35,0.0,0.12,0.9,78.0,36.0,7.6,9.0,E,2004-06-20
5019,Junior Classic Games,DS,2009.0,Misc,Avanquest,0.26,0.09,0.0,0.03,0.38,,,,,E,2009-07-15
2673,Resident Evil: Operation Raccoon City,X360,2012.0,Action,Capcom,0.55,0.14,0.03,0.06,0.77,52.0,56.0,4.9,331.0,M,2012-09-05


In [5]:
global_sales = df["global_sales"]

print("mean    :", global_sales.mean())
print("median  :", global_sales.median()) 
print("mode    :", global_sales.mode()[0]) # returns the modes of each column or row.
print("min     :", global_sales.min())
print("max     :", global_sales.max())
print("var     :", global_sales.var())
print("std     :", global_sales.std())
print("sum     :", global_sales.sum())
print("quantile:", global_sales.quantile())

mean    : 0.5166946231617646
median  : 0.16
mode    : 0.01
min     : 0.01
max     : 82.54
var     : 2.33001499446296
std     : 1.5264386638391205
sum     : 8994.619999999999
quantile: 0.16


In [6]:
df['date'].min()

'1976-04-10'

In [7]:
df['date'].max()

'2017-09-16'

In [8]:
def normalized_mean(col):
    return col.mean() / (col.max() - col.min())

def normalized_median(col):
    return col.median() / (col.max() - col.min())

def iqr(column):
    return column.quantile(0.75) - column.quantile(0.25)

In [9]:
df[['global_sales']].agg(iqr)

global_sales    0.4
dtype: float64

In [10]:
import numpy as np
df[['global_sales']].agg(np.median)

global_sales    0.16
dtype: float64

In [11]:
df[['global_sales']].agg(normalized_mean)

global_sales    0.006261
dtype: float64

In [12]:
df[['critic_score', 'user_score']].agg(normalized_mean)

critic_score    0.810757
user_score      0.733717
dtype: float64

In [13]:
df['global_sales'].agg([normalized_mean, normalized_median])

normalized_mean      0.006261
normalized_median    0.001939
Name: global_sales, dtype: float64

In [14]:
df[['critic_score', 'user_score']].agg([normalized_mean, normalized_median])

Unnamed: 0,critic_score,user_score
normalized_mean,0.810757,0.733717
normalized_median,0.835294,0.773196


In [15]:
# Each cell is populated with the cumulative sum of the values seen so far.
df['global_sales'].cumsum() 

5974        0.29
8237        0.47
2312        1.37
5019        1.75
2673        2.52
          ...   
2052     8991.44
15476    8991.46
2736     8992.22
7044     8992.45
762      8994.62
Name: global_sales, Length: 17408, dtype: float64

In [16]:
# Each cell is populated with the maximum value seen so far.
df['global_sales'].cummax()

5974      0.29
8237      0.29
2312      0.90
5019      0.90
2673      0.90
         ...  
2052     82.54
15476    82.54
2736     82.54
7044     82.54
762      82.54
Name: global_sales, Length: 17408, dtype: float64

In [17]:
# Each cell is populated with the minimum value seen so far.
df['global_sales'].cummin()

5974     0.29
8237     0.18
2312     0.18
5019     0.18
2673     0.18
         ... 
2052     0.01
15476    0.01
2736     0.01
7044     0.01
762      0.01
Name: global_sales, Length: 17408, dtype: float64

In [18]:
# Each cell is populated with the cumulative product of the values seen so far.
df['global_sales'].cumprod()

5974     0.290000
8237     0.052200
2312     0.046980
5019     0.017852
2673     0.013746
           ...   
2052     0.000000
15476    0.000000
2736     0.000000
7044     0.000000
762      0.000000
Name: global_sales, Length: 17408, dtype: float64

In [19]:
# Creating the dataframe
df = pd.DataFrame({"A":[5, 3, 6, 4], 
                   "B":[11, 2, 4, 3],
                   "C":[4, 3, 8, 5], 
                   "D":[5, 4, 2, 8]})
  
# cumulative product along column axis
df.cumprod(axis = 1)

Unnamed: 0,A,B,C,D
0,5,55,220,1100
1,3,6,18,72
2,6,24,192,384
3,4,12,60,480
