In [1]:
# IMPORTING LIBRARIES
import pandas as pd

In [2]:
# LOADING THE DATA FILE
nintendo = pd.read_csv("NintendoGames.csv")

In [3]:
# LOOKING AT THE SHAPE OF THE DATASET
nintendo.shape

(1094, 9)

In [4]:
# LOOKING AT THE COLUMNS
nintendo.columns

Index(['Meta Score', 'Title', 'Platform', 'Date', 'User Score', 'Link',
       'Esrb Rating', 'Developers', 'Geners'],
      dtype='object')

In [5]:
# HAVING A LOOK AT THE DATASET
nintendo.head()

Unnamed: 0,Meta Score,Title,Platform,Date,User Score,Link,Esrb Rating,Developers,Geners
0,,Super Mario RPG,Switch,17-Nov-23,,/game/switch/super-mario-rpg,E,['Nintendo'],"['Role-Playing', 'Japanese-Style']"
1,,WarioWare: Move It!,Switch,3-Nov-23,,/game/switch/warioware-move-it!,RP,['Intelligent Systems'],"['Miscellaneous', 'Party / Minigame']"
2,,Super Mario Bros. Wonder,Switch,20-Oct-23,,/game/switch/super-mario-bros-wonder,E,['Nintendo'],"['Action', 'Platformer', '2D']"
3,,Detective Pikachu Returns,Switch,6-Oct-23,,/game/switch/detective-pikachu-returns,,['Creatures Inc.'],"['Adventure', '3D', 'Third-Person']"
4,,Fae Farm,Switch,8-Sep-23,,/game/switch/fae-farm,E10+,['Phoenix Labs'],"['Simulation', 'Virtual', 'Virtual Life']"


In [6]:
# INFORMATION ABOUT THE DATASET
nintendo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1094 entries, 0 to 1093
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Meta Score   709 non-null    float64
 1   Title        1094 non-null   object 
 2   Platform     1094 non-null   object 
 3   Date         1094 non-null   object 
 4   User Score   856 non-null    float64
 5   Link         1094 non-null   object 
 6   Esrb Rating  972 non-null    object 
 7   Developers   1091 non-null   object 
 8   Geners       1094 non-null   object 
dtypes: float64(2), object(7)
memory usage: 77.1+ KB


In [7]:
# GETTING THE SUM OF THE NULL VALUES
nintendo.isnull().sum()

Meta Score     385
Title            0
Platform         0
Date             0
User Score     238
Link             0
Esrb Rating    122
Developers       3
Geners           0
dtype: int64

In [8]:
# PERCENTAGE OF THE NULL VALUES IN THE DATASET
nintendo.isnull().sum()/nintendo.shape[0] * 100

Meta Score     35.191956
Title           0.000000
Platform        0.000000
Date            0.000000
User Score     21.755027
Link            0.000000
Esrb Rating    11.151737
Developers      0.274223
Geners          0.000000
dtype: float64

In [9]:
# LOOKING AT THE VALUES OF COLUMNS TO FIGURE OUT THE BEST OUTCOME
nintendo['Esrb Rating'].value_counts()

Esrb Rating
E       660
T       150
E10+    142
M        15
RP        5
Name: count, dtype: int64

In [10]:
# FILLING NULL VALUES
x = ['Meta Score', 'User Score']
for i in x:
    nintendo[i].fillna(0, inplace= True)


In [11]:
# FILLING NULL VALUES
nintendo['Esrb Rating'].fillna("RP", inplace = True)

In [12]:
# LOOKING AT THE PERCENTAGE AGAIN
nintendo.isnull().sum()/nintendo.shape[0] * 100

Meta Score     0.000000
Title          0.000000
Platform       0.000000
Date           0.000000
User Score     0.000000
Link           0.000000
Esrb Rating    0.000000
Developers     0.274223
Geners         0.000000
dtype: float64

In [13]:
# DROPPING SOME VALUES AND COLUMNS
nintendo.dropna(subset=['Developers'], inplace = True)
nintendo.drop(['Link'], axis = 1, inplace = True )

In [14]:
# LOOKING AT THE PERCENTAGE AGAIN
nintendo.isnull().sum()/nintendo.shape[0] * 100

Meta Score     0.0
Title          0.0
Platform       0.0
Date           0.0
User Score     0.0
Esrb Rating    0.0
Developers     0.0
Geners         0.0
dtype: float64

In [15]:
# LOOKING AT THE PERCENTAGE AGAIN
nintendo['Esrb Rating'].value_counts()

Esrb Rating
E       658
T       150
E10+    142
RP      126
M        15
Name: count, dtype: int64

In [16]:
# REPLACING SOME VALUES OF THE COLUMNS TO GET BETTER UNDERSTANDING
nintendo['Esrb Rating'] = nintendo['Esrb Rating'].replace({'E' : 'Everyone',
                                                           'T' : "Teen",
                                                           "E10+": "Everyone above 10",
                                                           "RP" : "Rating Pending",
                                                           "M" : "Matuares Only"
                                                           })

In [17]:
# COUNTING THE VALUES AGAIN
nintendo['Esrb Rating'].value_counts()

Esrb Rating
Everyone             658
Teen                 150
Everyone above 10    142
Rating Pending       126
Matuares Only         15
Name: count, dtype: int64

In [18]:
# LOOKING AT THE SHAPE TO ANALYSE THE DIFFERENCE
nintendo.shape

(1091, 8)

In [19]:
# GETTING THE INFO AFTER ALMOST CLEANING THE DATASET
nintendo.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1091 entries, 0 to 1093
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Meta Score   1091 non-null   float64
 1   Title        1091 non-null   object 
 2   Platform     1091 non-null   object 
 3   Date         1091 non-null   object 
 4   User Score   1091 non-null   float64
 5   Esrb Rating  1091 non-null   object 
 6   Developers   1091 non-null   object 
 7   Geners       1091 non-null   object 
dtypes: float64(2), object(6)
memory usage: 76.7+ KB


In [20]:
# CHANGING THE DATATYPE FEELS TO BE CHANGED
x = ['Meta Score', 'User Score']
for i in x:
    nintendo[x] = nintendo[x].astype(int)

In [21]:
# FINAL LOOKUP
nintendo.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1091 entries, 0 to 1093
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Meta Score   1091 non-null   int32 
 1   Title        1091 non-null   object
 2   Platform     1091 non-null   object
 3   Date         1091 non-null   object
 4   User Score   1091 non-null   int32 
 5   Esrb Rating  1091 non-null   object
 6   Developers   1091 non-null   object
 7   Geners       1091 non-null   object
dtypes: int32(2), object(6)
memory usage: 68.2+ KB
