In [9]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
pd.__version__

'0.24.2'

# Series

In [3]:
# Pandas Series -> is a one-dimensional array of indexed data.
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [5]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [6]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [4]:
# customized index 
data1 = pd.Series([0.25, 0.5, 0.75, 1.0],index=['a', 'b', 'c', 'd'])

In [5]:
data1['b']

0.5

In [6]:
# Creating Series with Dictionary
population_dict = {'California': 38332521,'Texas': 26448193,'New York': 19651127,'Florida': 19552860,'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

# Data Frame 

A DataFrame is a multi-dimensional table made up of a collection of Series.


In [36]:
# There are multiple ways to create dataframe.
# 
data_dic = {
    'apples': [3, 2, 0, 1], 
    'oranges': [0.0, 3, 7.5, 2.54]
}

df = pd.DataFrame(data_dic)
# df.dtypes
df

Unnamed: 0,apples,oranges
0,3,0.0
1,2,3.0
2,0,7.5
3,1,2.54


In [38]:
frame = pd.DataFrame(
    [
    [1,2],
    ["Boris Yeltsin", "Mikhail Gorbachev"]
    ],
    index=["row1", "row2"],
    columns=["column1", "column2"]
)
frame

Unnamed: 0,column1,column2
row1,1,2
row2,Boris Yeltsin,Mikhail Gorbachev


In [11]:
# with customized index
purchases = pd.DataFrame(data_dic, index=['June', 'Robert', 'Lily', 'David'])
purchases

Unnamed: 0,apples,oranges
June,3,0
Robert,2,3
Lily,0,7
David,1,2


In [16]:
a = pd.Series([1,2,np.nan])
a

0    1.0
1    2.0
2    NaN
dtype: float64

# more info
https://www.dataquest.io/blog/pandas-python-tutorial/

https://www.learndatasci.com/tutorials/python-pandas-tutorial-complete-introduction-for-beginners/

In [5]:
reviews = pd.read_csv("sample_data/ign.csv")

In [7]:
reviews.sample(5)

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform,score,genre,editors_choice,release_year,release_month,release_day
8557,8557,Good,Marvel: Ultimate Alliance,/games/marvel-ultimate-alliance/ps3-822967,PlayStation 3,7.9,"Action, RPG",N,2006.0,11.0,15.0
7076,7076,Okay,Metal Slug 4 & 5,/games/metal-slug-4-5/xbox-680607,Xbox,6.1,Shooter,N,2005.0,8.0,25.0
2269,2269,Okay,All-Star Baseball 2001,/games/all-star-baseball-2001/gbc-14204,Game Boy Color,6.0,Sports,N,2000.0,6.0,7.0
9196,9196,Okay,Luxor 2,/games/luxor-2/xbox-360-881249,Xbox 360,6.8,"Puzzle, Action",N,2007.0,4.0,5.0
15530,15530,Good,Major League Baseball 2K11,/games/major-league-baseball-2k11/ps3-93320,PlayStation 3,7.0,Sports,N,2011.0,3.0,15.0


In [6]:
reviews.describe()

Unnamed: 0.1,Unnamed: 0,score,release_year,release_month,release_day
count,15773.0,15772.0,15772.0,15772.0,15772.0
mean,7886.0,6.872312,2005.32862,7.212909,15.48212
std,4553.417233,1.729945,3.899812,3.509555,8.706131
min,0.0,0.5,1970.0,1.0,1.0
25%,3943.0,6.0,2002.0,4.0,8.0
50%,7886.0,7.2,2006.0,8.0,15.0
75%,11829.0,8.0,2008.0,10.0,23.0
max,15772.0,10.0,2013.0,12.0,31.0


In [14]:
# plt.hist(reviews.score)

In [8]:
# reviews.skew()

Unnamed: 0       0.000000
score           -0.827887
release_year    -0.443392
release_month   -0.304174
release_day      0.043981
dtype: float64

In [13]:
# reviews.kurtosis()

Unnamed: 0      -1.200000
score            0.215415
release_year    -0.414832
release_month   -1.265018
release_day     -1.173745
dtype: float64

In [23]:
# reviews.count()

Unnamed: 0        15773
score_phrase      15773
title             15773
url               15773
platform          15772
score             15772
genre             15741
editors_choice    15772
release_year      15772
release_month     15772
release_day       15772
dtype: int64

In [25]:
reviews.sample(5)
# reviews.head(5)
# reviews.tail(5)

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform,score,genre,editors_choice,release_year,release_month,release_day
1141,1141,Great,Uprising-X,/games/uprising-x/ps-2315,PlayStation,8.0,Action,N,1998.0,12.0,16.0
7252,7252,Good,Shrek SuperSlam,/games/madagascar-shrek-superslam-2-in-1-combo...,GameCube,7.0,Fighting,N,2005.0,10.0,28.0
2313,2313,Okay,Omikron: the Nomad Soul,/games/omikron-the-nomad-soul/dc-13732,Dreamcast,6.7,Adventure,N,2000.0,7.0,5.0
11301,11301,Bad,Chase H.Q.,/games/chase-hq/wii-14270155,Wii,4.5,Racing,N,2008.0,8.0,1.0
1435,1435,Bad,Neo Mystery Bonus,/games/neo-mystery-bonus/ngpc-11054,NeoGeo Pocket Color,4.0,Casino,N,1999.0,7.0,21.0


In [30]:
reviews.iloc[:,4] # index

0        PlayStation Vita
1        PlayStation Vita
2                    iPad
3                Xbox 360
4           PlayStation 3
5               Macintosh
6                Xbox 360
7                      PC
8           PlayStation 3
9                      PC
10          PlayStation 3
11               Xbox 360
12                 iPhone
13               Xbox 360
14                     PC
15              Macintosh
16                     PC
17                 iPhone
18          PlayStation 3
19               Xbox 360
20          PlayStation 3
21               Xbox 360
22          PlayStation 3
23                     PC
24                     PC
25                     PC
26                   iPad
27                     PC
28                     PC
29          PlayStation 3
               ...       
15743                  PC
15744                 Wii
15745                  PC
15746            Xbox 360
15747       PlayStation 3
15748                 Wii
15749         Nintendo DS
15750       

In [35]:
reviews.loc[:,['score','platform']] # using colomn name
# reviews[['score','platform']]

Unnamed: 0,score,platform
0,9.0,PlayStation Vita
1,9.0,PlayStation Vita
2,8.5,iPad
3,8.5,Xbox 360
4,8.5,PlayStation 3
5,7.0,Macintosh
6,3.0,Xbox 360
7,9.0,PC
8,3.0,PlayStation 3
9,7.0,PC


In [34]:
reviews['score'] 
# reviews.loc[:,['score']]

Unnamed: 0,score
0,9.0
1,9.0
2,8.5
3,8.5
4,8.5
5,7.0
6,3.0
7,9.0
8,3.0
9,7.0


In [39]:
reviews.mean(axis=0)

Unnamed: 0       7886.000000
score               6.872312
release_year     2005.328620
release_month       7.212909
release_day        15.482120
dtype: float64

In [41]:
reviews.corr()

Unnamed: 0.1,Unnamed: 0,score,release_year,release_month,release_day
Unnamed: 0,1.0,-0.047105,0.830205,-0.087804,-0.014954
score,-0.047105,1.0,-0.011675,0.017116,0.009385
release_year,0.830205,-0.011675,1.0,-0.100548,-0.005688
release_month,-0.087804,0.017116,-0.100548,1.0,-0.061819
release_day,-0.014954,0.009385,-0.005688,-0.061819,1.0


In [42]:
reviews.std()

Unnamed: 0       4553.417233
score               1.729945
release_year        3.899812
release_month       3.509555
release_day         8.706131
dtype: float64

In [54]:
reviews[score_filter]

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform,score,genre,editors_choice,release_year,release_month,release_day
890,890,Disaster,Extreme PaintBrawl,/games/extreme-paintbrawl/pc-10455,PC,0.7,Action,N,1998.0,10.0,29.0
5242,5242,Disaster,Looney Tunes: Back in Action: Zany Race,/games/looney-tunes-back-in-action-zany-race/c...,Wireless,0.5,Racing,N,2003.0,10.0,28.0
12513,12513,Disaster,Action Girlz Racing,/games/action-girlz-racing/wii-889218,Wii,0.8,Racing,N,2009.0,2.0,11.0


In [None]:
score_filter = reviews["score"] < 1

In [56]:
f = reviews["platform"] == 'Wii'

In [59]:
reviews[f].shape

(1293, 11)

In [60]:
reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15773 entries, 0 to 15772
Data columns (total 11 columns):
Unnamed: 0        15773 non-null int64
score_phrase      15773 non-null object
title             15773 non-null object
url               15773 non-null object
platform          15772 non-null object
score             15772 non-null float64
genre             15741 non-null object
editors_choice    15772 non-null object
release_year      15772 non-null float64
release_month     15772 non-null float64
release_day       15772 non-null float64
dtypes: float64(4), int64(1), object(6)
memory usage: 985.9+ KB


In [61]:
r = reviews.dropna()

In [62]:
r.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15741 entries, 0 to 15771
Data columns (total 11 columns):
Unnamed: 0        15741 non-null int64
score_phrase      15741 non-null object
title             15741 non-null object
url               15741 non-null object
platform          15741 non-null object
score             15741 non-null float64
genre             15741 non-null object
editors_choice    15741 non-null object
release_year      15741 non-null float64
release_month     15741 non-null float64
release_day       15741 non-null float64
dtypes: float64(4), int64(1), object(6)
memory usage: 1.1+ MB
